pax_global_header00006660000000000000000000000064132414363270014517gustar00rootroot0000000000000052 comment=c91f11dd8f680e69bc3726ad5cb77a46fb905c7b googler-3.5/000077500000000000000000000000001324143632700130245ustar00rootroot00000000000000googler-3.5/.github/000077500000000000000000000000001324143632700143645ustar00rootroot00000000000000googler-3.5/.github/ISSUE_TEMPLATE.md000066400000000000000000000023541324143632700170750ustar00rootroot00000000000000#### Bug reports Before opening an issue, please try to reproduce on [the latest development version](https://github.com/jarun/googler#installing-from-this-repository) first. The bug you noticed might have already been fixed. If the issue can be reproduced on master, then please make sure you provide the following: - Output of `googler -d`; - Link to the response body (you should see a line like `[DEBUG] Response body written to '/Volumes/ramdisk/googler-response-xxxxxxxx'` in the output of `googler -d`; please upload the file to a [gist](https://gist.github.com/) and include the gist's URL in the issue); - Details of operating system, Python version used, terminal emulator and shell; - `locale` output, if relevant. It's a good idea to set your locale to UFT-8. Please refer to [googler #131](https://github.com/jarun/googler/issues/131). If we need more information and there is no communication from the bug reporter within 7 days from the date of request, we will close the issue. If you have relevant information, resume discussion any time. #### Feature requests Please consider contributing the feature back to `googler` yourself. Feel free to discuss. We are more than happy to help. --- PLEASE DELETE THIS LINE AND EVERYTHING ABOVE --- googler-3.5/.gitignore000066400000000000000000000000231324143632700150070ustar00rootroot00000000000000build/ dist/ *.bak googler-3.5/.travis.yml000066400000000000000000000024011324143632700151320ustar00rootroot00000000000000language: python python: - "3.4" - "3.5" - "3.6" sudo: required services: - docker dist: trusty before_install: - "pip install --upgrade setuptools" - "pip install --upgrade pip" script: - ./tests/ci-test-wrapper --watch .travis.yml before_deploy: - sudo apt-get update -qy - sudo apt-get install -qy python3 python3-pip - python3 -m pip install packagecore - packagecore -o dist/ "${TRAVIS_TAG#v}" deploy: provider: releases api_key: secure: g00YmDCb9gszABGJfzTyWeSy6zvvHCRI+vVjvJV7ubgs7L3JUeXfIfLeHZ4fUCX6RjhsOMwgQIIJVVtK5kgUI9YkBRJWdVo7jmJRefkxwTu2SF5SwDjYumiE6mqQlGCfo7OcV0/a/T5ipt5JyBUtY7DIMB2/wyz3jLPCvx4/aQo6COw0tKzaXgiXm0eJz6biEkc3QwGkFUNJFKgmvuvi+FYBJU21fD4cto8ck9i/0IUjsGxbExSpMKbe8bKj3BVh4dyMvZ6e+I/y2l2MM/RazssgFpiqabBm61CL4XCKxXzBsOgjhHSts7y+oWD3YnQqeaeugO5c6d8NPF5LSQk6VYEWYNwt3tdov/6zMMJohyN975AncjtwISzFzBZhAcXUeFxzs+6lfdNZc5lCE9gl+G9gUjWQ/0xyEFtT3m3kyjLlofoqiLwGv/+liaFSSxUpQd2ZHu5OoRjHcCJlxDd+9ppFDjV267zaa4eVBut87PP0QamUWSHEQW4tIuWLujCgJOn+IAMJTppNJ06q5RShV3ilbwlL+SkDSRUS79bLn5ELEgFADTJtUZwLU33VsYgTJ2Ktauu27kvjGitVTR+MgRzc2MVY1Gm6wN8AIe1HQj9YV0ePFp656Q8W29UnuN3QAfPwi2o5SimkNTrtp3MtQhq2nCBB5WqDv5L+DEwj654= file_glob: true file: - dist/* skip_cleanup: true on: tags: true repo: jarun/googler python: "3.6" googler-3.5/CHANGELOG000066400000000000000000000250631324143632700142440ustar00rootroot00000000000000googler 3.5 2018-02-16 What's in? - URL folding to show only domain name - Omniprompt key `c` to copy URL to clipboard - Support env var `DISABLE_PROMPT_COLOR` to disable prompt color (see #203) Note: Python 3.3 reached EOL, will not be supported anymore. ------------------------------------------------------------------------------- googler 3.4 2017-10-02 What's in? - Support custom URL handler script or cli utility (option `--url-handler`) - Support text browser override with GUI browser (omniprompt key `O`) - A stunning project logo! (designed by @zmwangx) ------------------------------------------------------------------------------- googler 3.3 2017-08-17 What's in? - Search auto-completion (using completion scripts) - Python 3.6 support - Automated release package builds using PackageCore ------------------------------------------------------------------------------- googler 3.2 2017-07-07 What's in? - Basic authentication with `--proxy` - Option `--unfilter` to include similar results - New googler @ts : Manga Reader, Mac Rumors, OMG! Ubuntu! - Fix: skip certain card results with `--noua` - options `--json` and `--exact` decoupled ------------------------------------------------------------------------------- googler 3.1 2017-04-28 What's in? - Search result metadata (e.g. IMDB rating) - Multi-site search - Browse numeric ranges at omniprompt - googler@ - Financial Times, The Pirate Bay added ------------------------------------------------------------------------------- googler 3.0 2017-03-12 Modifications - Introducing [googler @t](https://github.com/jarun/googler#googler-t) add-on! - Open multiple indices from omniprompt - Open all indices from omniprompt - Option `--enable-browser-output` is now `--show-browser-logs` - Multiple bug fixes ------------------------------------------------------------------------------- googler 2.9 2016-12-18 **NOTICE** - `googler` is on Debian and Ubuntu official releases now. In addition, there's a PPA in place to install the latest program releases from. Modifications - Omniprompt option to search exact keywords on auto-correction - Push cmdline arguments to readline history (simplifies editing the keywords) - Added check to ensure UTF-8 encoding - Support 3 HTTP redirections before failing to connect - Support environment variable https_proxy - Python 3.5.3 compliance for TLS 1.2 - Removed deb package generation scripts ------------------------------------------------------------------------------- googler 2.8 2016-10-04 Modifications - Add option --notweak to disable TCP optimizations and forced TLS 1.2. - Limited self-upgrade options to -U or --upgrade. Removed --update. ------------------------------------------------------------------------------- googler v2.7 2016-08-28 Modifications - Show google services abstract with User Agent disabled. - In-place self-upgrade mechanism. - Fix integration with text-based browsers. - Set process title to googler if setproctitle is installed. ------------------------------------------------------------------------------- googler v2.6 2016-07-06 Modifications - Option `--noua` to disable UA (default - enabled). - Logging and auto-completion script changes. ------------------------------------------------------------------------------- googler v2.5.1 2016-06-13 Modifications - Enable TCP/IP optimizations only for Linux. This fails on OS X. NOTE: The optimizations do not work on Linux 2.4 and earlier either. ------------------------------------------------------------------------------- googler v2.5 2016-06-12 **NOTICE:** - Python 2.x support is discontinued. - googler is now available on [Debian Sid](https://packages.debian.org/unstable/main/googler) Modifications - Invoking `googler` without search keywords shows omniprompt - Introduced options -h and --help to show program help and exit - Support cookie - Use TLS 1.2 (Python 3.4 and above) - Omniprompt key to unfilter filtered similar results - HTTPS proxy support (non-TLS 1.2 supported) - News time shown in cyan by default - Tons of code, logging and debug improvements (thanks Zhiming) ------------------------------------------------------------------------------- googler v2.4.1 2016-05-22 **NOTICE:** Python 2.x support is deprecated now. While it's still possible to use Python 2.x by editing the shebang, we have found issues with Python 2.x (e.g. readline doesn't work) which don't have a satisfactory solution without impacting other features. Python 2.x support will be completely removed in the next version. Modifications - Sitelinks support - Customizable colours - Context in News results - .deb package for Debian and Ubuntu family - Basic support for terminal emulators having ANSI escape sequence support on Windows - New omniprompt option -f to jump to first results page - New omniprompt key -o to open the current search in browser - Shorter omniprompt - Non-interactive mode to fetch results and exit - JSON output support - A complete re-write of the HTML parser ------------------------------------------------------------------------------- googler v2.3 2016-04-23 Modifications - Google Site Search support (option -w) - Auto-completion scripts for Zsh, Bash and Fish shells - All Google top level domains supported - Show time for news - Integrated omniprompt help - Move to argparse - Additional long options easier to remember - Graceful SIGINT handler - Add version to debug logs AND ... - An *awesssome* asciinema recording for the README from Zhiming ------------------------------------------------------------------------------- googler v2.2 2016-03-12 Modifications - Show quotes in text and title - Option to disable automatic spelling correction - User agent identifier added for all requests - Improved concise omniprompt with color inversion to work as a page separator - Set column size to auto when sys.stderr is not a tty - Decode HTTPS response in UTF-8 - Dynamically detect python version using /usr/bin/env - Handle EOF (Ctrl-d) at omniprompt Improvements - Refactored code - Modularized code for repetitive logic - Unnecessary code removal - Dump full HTML response in debug mode - Homebrew integration - Travis integration - A better readme in 100% markdown and ToC with references ------------------------------------------------------------------------------- googler v2.1 2016-02-01 Modifications - Project renamed to googler, same as the utility - Gzip compression to fetch data - Improved continuous search (works without the `g` key at prompt now. Check Example 10 in README for exceptions) - Skip Google News, Images links and ads - Show skipped link count ------------------------------------------------------------------------------- google-cli v2.0 2016-01-09 Modifications - IMPORTANT fix for issue #19: Google replaced "li" with "div" as search result separator. Users must update to this release or latest dev version for google-cli to work. - Handle formatting on Mac OS X in emacs eshell (or any terminal envornment where number of columns returned is 0). - PEP 8 style adaptation. Thanks @shaggytwodope! ------------------------------------------------------------------------------- google-cli v1.9 2015-11-13 Modifications - Skip results without any URL (Google custom results like time, define etc.). - Use readline library to support arrow keys in input. - Support installation on OSX. Thanks @ibaaj. - Pre-check negative index before attempting to open URL. - Handle exception: "socket.gaierror: [Errno -2] Name or service not known" due to connection throttle on low-bandwidth. - Print correct Exception in case of connection timeout. ------------------------------------------------------------------------------- google-cli v1.8 2015-10-11 Modification - Added timeout to HTTPSConnection() - Redirected stdout and stderr to suppress all warning & error messages when opening results in Firefox ------------------------------------------------------------------------------- google-cli v1.7 2015-10-07 Modification - Added support for redirection and piping - Used stderr instead of stdin to determine console geometry ------------------------------------------------------------------------------- google-cli v1.6 2015-09-12 Modification - Changed incremental search key from s to g keeping in mind that users may use g as the alias for googler. Fix - Handle httplib.BadStatusLine exception. This happens if the connection is closed due to inactivity. Now googler will reconnect and re-issue the search. ------------------------------------------------------------------------------- google-cli v1.5 2015-09-04 New capabilities - Incremental search support from the same running instance - Utility name changed to googler to void any copyright infringements ------------------------------------------------------------------------------- google-cli v1.2 2015-09-03 New capabilities - Open result in browser using index number (thanks jeremija) - Google News support - Time limit search by hours - Country specific search (28 top-level domains added) - Add switch to enable debug logs Removal - Removed file type specific search option -f in favour of filetype:mime Google keyword Fixes - Convert %22 to " (double quote) in URLs - Inputs other than n, p or number (+ Enter) exit - Fix failure to open URL with " (double quotes) in browser - Fix version information in manpage - Get rid of Google Chrome debug/error messages in console when opening URL ------------------------------------------------------------------------------- google-cli v1.1 2015-08-25 New capabilities - Add Python 3.x support - Add UTF-8 request and response [both the contributions are from Narrat] NOTE: The next change in queue is to support opening the URLs in browser. As we can see during preliminary tests, there are several issues around Google Chrome and its mods. This release works as a stable release before we hop on. ------------------------------------------------------------------------------- google-cli v1.0 2015-08-22 New capabilities - HTTPS support - Navigate as in regular google search - File type in search as an option - Time limited search (day, week, month, year) - Show full text snippet of search results - Unicode in URL support - Honour -j even if -n is not used and open the result in browser - Skip browser to show result in console for empty URL, e.g., first result of 'define hello' - Handle google redirections (error 302) - Throw error in case of google error due to unusual activity from IP Fixes - Adapt to new google HTML response - Fixed character encoding problem in URL e.g. double quotes (%22) changed to %2522 ------------------------------------------------------------------------------- googler-3.5/LICENSE000066400000000000000000001045131324143632700140350ustar00rootroot00000000000000 GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU General Public License is a free, copyleft license for software and other kinds of works. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Use with the GNU Affero General Public License. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode: Copyright (C) This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an "about box". You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see . The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read . googler-3.5/Makefile000066400000000000000000000015301324143632700144630ustar00rootroot00000000000000PREFIX ?= /usr/local BINDIR = $(DESTDIR)$(PREFIX)/bin MANDIR = $(DESTDIR)$(PREFIX)/share/man/man1 DOCDIR = $(DESTDIR)$(PREFIX)/share/doc/googler .PHONY: all install uninstall disable-self-upgrade all: install: install -m755 -d $(BINDIR) install -m755 -d $(MANDIR) install -m755 -d $(DOCDIR) gzip -c googler.1 > googler.1.gz install -m755 googler $(BINDIR) install -m644 googler.1.gz $(MANDIR) install -m644 README.md $(DOCDIR) rm -f googler.1.gz uninstall: rm -f $(BINDIR)/googler rm -f $(MANDIR)/googler.1.gz rm -rf $(DOCDIR) # Disable the self-upgrade mechanism entirely. Intended for packagers. # # We assume that sed(1) has the -i option, which is not POSIX but seems common # enough in modern implementations. disable-self-upgrade: sed -i.bak 's/^ENABLE_SELF_UPGRADE_MECHANISM = True$$/ENABLE_SELF_UPGRADE_MECHANISM = False/' googler googler-3.5/README.md000066400000000000000000000517111324143632700143100ustar00rootroot00000000000000

googler

Latest release AUR Homebrew Debian Stretch+ Ubuntu Yakkety+ Ubuntu PPA License Build Status

Asciicast

`googler` is a power tool to Google (Web & News) and Google Site Search from the command-line. It shows the title, URL and abstract for each result, which can be directly opened in a browser from the terminal. Results are fetched in pages (with page navigation). Supports sequential searches in a single `googler` instance. `googler` was initially written to cater to headless servers without X. You can integrate it with a text-based browser. However, it has grown into a very handy and flexible utility that delivers much more. For example, fetch any number of results or start anywhere, limit search by any duration, define aliases to google search any number of websites, switch domains easily... all of this in a very clean interface without ads or stray URLs. The shell completion scripts make sure you don't need to remember any options. `googler` isn't affiliated to Google in any way. *Looking for a similar utility for privacy-aware DuckDuckGo?* Check out [`ddgr`](https://github.com/jarun/ddgr)! *Love smart and efficient terminal utilities? Explore my repositories. Buy me a cup of coffee if they help you.*

Donate via PayPal!

### Table of contents - [Features](#features) - [Installation](#installation) - [Dependencies](#dependencies) - [From a package manager](#from-a-package-manager) - [Tips for packagers](#tips-for-packagers) - [Release packages](#release-packages) - [From source](#from-source) - [Running standalone](#running-standalone) - [Downloading a single file](#downloading-a-single-file) - [Shell completion](#shell-completion) - [Usage](#usage) - [Cmdline options](#cmdline-options) - [Configuration file](#configuration-file) - [googler @t](#googler-t) - [Text-based browser integration](#text-based-browser-integration) - [Terminal Reading Mode or Reader View](#terminal-reading-mode-or-reader-view) - [Colors](#colors) - [Domain-only URL](#domain-only-url) - [Examples](#examples) - [Troubleshooting](#troubleshooting) - [Notes](#notes) - [Contributions](#contributions) - [Developers](#developers) ### Features - Google Search, Google Site Search, Google News - Fast and clean (no ads, stray URLs or clutter), custom color - Navigate result pages from omniprompt, open URLs in browser - Effortless keyword-based site search with googler @t add-on - Search and option completion scripts for Bash, Zsh and Fish - Fetch n results in a go, start at the nth result - Disable automatic spelling correction and search exact keywords - Specify duration, country/domain (default: worldwide/.com), language - Google keywords (e.g. `filetype:mime`, `site:somesite.com`) support - Open the first result directly in browser (as in *I'm Feeling Lucky*) - Non-stop searches: fire new searches at omniprompt without exiting - HTTPS proxy, User Agent, TLS 1.2 (default) support - Comprehensive documentation, man page with handy usage examples - Minimal dependencies ### Installation #### Dependencies `googler` requires Python 3.4 or later. Only the latest patch release of each minor version is supported. To copy url to clipboard at the omniprompt, `googler` uses `xsel` on Linux, `pbcopy` (default installed) on OS X and `clip` (default installed) on Windows. #### From a package manager - [AUR](https://aur.archlinux.org/packages/googler/) - [Debian](https://packages.debian.org/search?keywords=googler&searchon=names) - [FreeBSD](https://www.freshports.org/www/googler/) (`pkg install googler`) - [Homebrew](http://formulae.brew.sh/formula/googler) - [NixOS](https://github.com/NixOS/nixpkgs/tree/master/pkgs/applications/misc/googler) (`sudo nix-env -i googler`) - [openSUSE](https://software.opensuse.org/package/googler) - [Slackware](http://slackbuilds.org/repository/14.2/network/googler/) - [Ubuntu](https://packages.ubuntu.com/search?keywords=googler&searchon=names) - [Ubuntu PPA](https://launchpad.net/~twodopeshaggy/+archive/ubuntu/jarun/) ##### Tips for packagers `googler` v2.7 and later ships with an in-place self-upgrade mechanism which you may want to disable. To do this, run $ make disable-self-upgrade before installation. #### Release packages Packages for Arch Linux, CentOS, Debian, Fedora, openSUSE and Ubuntu are available with the [latest stable release](https://github.com/jarun/googler/releases/latest). #### From source If you have git installed, clone this repository. Otherwise download the [latest stable release](https://github.com/jarun/googler/releases/latest) or [development version](https://github.com/jarun/googler/archive/master.zip). To install to the default location (`/usr/local`): $ sudo make install To remove `googler` and associated docs, run $ sudo make uninstall `PREFIX` is supported, in case you want to install to a different location. #### Running standalone `googler` is a standalone executable. From the containing directory: $ ./googler #### Downloading a single file `googler` is a single standalone script, so you could download just a single file if you'd like to. To install the latest stable version, run $ sudo curl -o /usr/local/bin/googler https://raw.githubusercontent.com/jarun/googler/v3.5/googler && sudo chmod +x /usr/local/bin/googler You could then let googler upgrade itself by running $ sudo googler -u Similarly, if you want to install from git master (*risky*), run $ sudo curl -o /usr/local/bin/googler https://raw.githubusercontent.com/jarun/googler/master/googler && sudo chmod +x /usr/local/bin/googler and upgrade by running $ sudo googler -u --include-git ### Shell completion Search keyword and option completion scripts for Bash, Fish and Zsh can be found in respective subdirectories of [`auto-completion/`](auto-completion). Please refer to your shell's manual for installation instructions. ### Usage #### Cmdline options ``` usage: googler [-h] [-s N] [-n N] [-N] [-c TLD] [-l LANG] [-x] [-C] [--colors COLORS] [-j] [-t dN] [-w SITE] [--unfilter] [-p PROXY] [--noua] [--notweak] [--json] [--url-handler UTIL] [--show-browser-logs] [--np] [-u] [--include-git] [-v] [-d] [KEYWORD [KEYWORD ...]] Google from the command-line. positional arguments: KEYWORD search keywords optional arguments: -h, --help show this help message and exit -s N, --start N start at the Nth result -n N, --count N show N results (default 10) -N, --news show results from news section -c TLD, --tld TLD country-specific search with top-level domain .TLD, e.g., 'in' for India -l LANG, --lang LANG display in language LANG -x, --exact disable automatic spelling correction -C, --nocolor disable color output --colors COLORS set output colors (see man page for details) -j, --first, --lucky open the first result in web browser and exit -t dN, --time dN time limit search [h5 (5 hrs), d5 (5 days), w5 (5 weeks), m5 (5 months), y5 (5 years)] -w SITE, --site SITE search a site using Google --unfilter do not omit similar results -p PROXY, --proxy PROXY tunnel traffic through an HTTP proxy; PROXY is of the form [http://][user:password@]proxyhost[:port] --noua disable user agent --notweak disable TCP optimizations and forced TLS 1.2 --json output in JSON format; implies --noprompt --url-handler UTIL custom script or cli utility to open results --show-browser-logs do not suppress browser output (stdout and stderr) --np, --noprompt search and exit, do not prompt -u, --upgrade perform in-place self-upgrade --include-git when used with --upgrade, upgrade to latest git master -v, --version show program's version number and exit -d, --debug enable debugging omniprompt keys: n, p fetch the next or previous set of search results index open the result corresponding to index in browser f jump to the first page o [index|range|a ...] open space-separated result indices, numeric ranges (sitelinks unsupported in ranges), or all, in browser open the current search in browser, if no arguments O [index|range|a ...] like key 'o', but try to open in a GUI browser g keywords new Google search for 'keywords' with original options should be used to search omniprompt keys and indices c index copy url to clipboard u toggle url expansion q, ^D, double Enter exit googler ? show omniprompt help * other inputs issue a new search with original options ``` #### Configuration file `googler` doesn't have any! This is to retain the speed of the utility and avoid OS-specific differences. Users can enjoy the advantages of config files using aliases (with the exception of the color scheme, which can be additionally customized through an environment variable; see [Colors](#colors)). There's no need to memorize options. For example, the following alias for bash/zsh/ksh/etc. alias g='googler -n 7 -c ru -l ru' fetches 7 results from the Google Russia server, with preference towards results in Russian. The alias serves both the purposes of using config files: - Persistent settings: when the user invokes `g`, it expands to the preferred settings. - Override settings: thanks to the way Python `argparse` works, `googler` is written so that the settings in alias are completely overridden by any options passed from cli. So when the same user runs `g -l de -c de -n 12 hello world`, 12 results are returned from the Google Germany server, with preference towards results in German. #### googler @t `googler @t` is a convenient add-on to Google Site Search with unique keywords. While `googler` has an integrated option to search a site, we simplified it further with aliases. The file [googler_at](https://github.com/jarun/googler/blob/master/auto-completion/googler_at/googler_at) contains a list of website search aliases. To source it, run: $ source googler_at or, $ . googler_at With `googler @t`, here's how you search Wikipedia for `hexspeak`: $ @w hexspeak Oh yes! You can combine other `googler` options too! To make life easier, you can also configure your shell to source the file when it starts. All the aliases start with the `@` symbol (hence the name `googler @t`) and there is minimum chance they will conflict with any shell commands. Feel free to add your own aliases to the file and contribute back the interesting ones. #### Text-based browser integration `googler` works out of the box with several text-based browsers if the `BROWSER` environment variable is set. For instance, $ export BROWSER=w3m or for one-time use, $ BROWSER=w3m googler query Due to certain graphical browsers spewing messages to the console, `googler` suppresses browser output by default unless `BROWSER` is set to one of the known text-based browsers: currently `elinks`, `links`, `lynx`, `w3m` or `www-browser`. If you use a different text-based browser, you will need to explicitly enable browser output with the `--show-browser-logs` option. If you believe your browser is popular enough, please submit an issue or pull request and we will consider whitelisting it. See the man page for more details on `--show-browser-logs`. If you need to use a GUI browser with `BROWSER` set, use the omniprompt key `O`. `googler` will try to ignore text-based browsers and invoke a GUI browser. Browser logs are always suppressed with `O`. #### Terminal Reading Mode or Reader View You can easily integrate `googler` with some equally awesome utilities to enjoy your daily dose of news in a completely distraction-free environment. Read more [in the wiki](https://github.com/jarun/googler/wiki/Terminal-Reading-Mode-or-Reader-View). #### Colors `googler` allows you to customize the color scheme via a six-letter string, reminiscent of BSD `LSCOLORS`. The six letters represent the colors of - indices - titles - URLs - metadata/publishing info (Google News only) - abstracts - prompts respectively. The six-letter string is passed in either as the argument to the `--colors` option, or as the value of the environment variable `GOOGLER_COLORS`. We offer the following colors/styles: Letter | Color/Style ------ | ----------- a | black b | red c | green d | yellow e | blue f | magenta g | cyan h | white i | bright black j | bright red k | bright green l | bright yellow m | bright blue n | bright magenta o | bright cyan p | bright white A-H | bold version of the lowercase-letter color I-P | bold version of the lowercase-letter bright color x | normal X | bold y | reverse video Y | bold reverse video The default colors string is `GKlgxy`, which stands for - bold bright cyan indices - bold bright green titles - bright yellow URLs - cyan metadata/publishing info - normal abstracts - reverse video prompts Note that - Bright colors (implemented as `\x1b[90m`–`\x1b[97m`) may not be available in all color-capable terminal emulators; - Some terminal emulators draw bold text in bright colors instead; - Some terminal emulators only distinguish between bold and bright colors via a default-off switch. Please consult the manual of your terminal emulator as well as the [Wikipedia article](https://en.wikipedia.org/wiki/ANSI_escape_code) on ANSI escape sequences. #### Domain-only URL To show the domain names in search results instead of the expanded URL (and use lesser space), set the environment variable `DISABLE_URL_EXPANSION`. ### Examples 1. Google **hello world**: $ googler hello world 2. Fetch **15 results** updated within the last **14 months**, starting from the **3rd result** for the keywords **jungle book** in **site** imdb.com: $ googler -n 15 -s 3 -t m14 -w imdb.com jungle book 3. Read recent **news** on gadgets: $ googler -N gadgets 4. Fetch results on IPL cricket from **Google India** server in **English**: $ googler -c in -l en IPL cricket 5. Search **quoted text**: $ googler it\'s a \"beautiful world\" in spring 6. Search for a **specific file type**: $ googler instrumental filetype:mp3 7. Disable **automatic spelling correction**, e.g. fetch results for `googler` instead of `google`: $ googler -x googler 8. **I'm feeling lucky** search: $ googler -j leather jackets 9. **Website specific** search: $ googler -w amazon.com -w ebay.com digital camera Site specific search continues at omniprompt. 10. Alias to find **definitions of words**: alias define='googler -n 2 define' 11. Look up `n`, `p`, `o`, `O`, `q`, `g keywords` or a result index at the **omniprompt**: as the omniprompt recognizes these keys or index strings as commands, you need to prefix them with `g`, e.g., g n g g keywords g 1 12. Input and output **redirection**: $ googler -C hello world < input > output Note that `-C` is required to avoid printing control characters (for colored output). 13. **Pipe** output: $ googler -C hello world | tee output 14. Use a **custom color scheme**, e.g., a warm color scheme designed for Solarized Dark ([screenshot](https://i.imgur.com/6L8VlfS.png)): $ googler --colors bjdxxy google $ GOOGLER_COLORS=bjdxxy googler google 15. Tunnel traffic through an **HTTPS proxy**, e.g., a local Privoxy instance listening on port 8118: $ googler --proxy localhost:8118 google By default the environment variable `https_proxy` is used, if defined. 16. Quote multiple search keywords to auto-complete (using completion script): $ googler 'hello w 17. More **help**: $ googler -h $ man googler ### Troubleshooting 1. In some instances `googler` may show fewer number of results than you expect, e.g., if you fetch a single result (`-n 1`) it may not show any results. The reason is Google shows some Google service (e.g. Youtube) results, map locations etc. depending on your geographical data, which `googler` tries to omit. In some cases Google (the web-service) doesn't show exactly 10 results (default) on a search. We chose to omit these results as far as possible. While this can be fixed, it would need more processing (and more time). You can just navigate forward to fetch the next set of results. 2. By default `googler` applies some TCP optimizations and forces TLS 1.2 (on Python 3.4 and above). If you are facing connection issues, try disabling both using the `--notweak` switch. 3. Google News service is not available if the language is `dk` (Denmark), `fi` (Finland) or `is` (Iceland). Use `-l en`. Please refer to #187 for more information. 4. Some users have reported problems with a colored omniprompt (refer to issue [#203](https://github.com/jarun/googler/issues/203)) with iTerm2 on OS X. To force a plain omniprompt: export DISABLE_PROMPT_COLOR=1 ### Notes 1. Initially I raised a pull request but I could see that the last change was made 7 years earlier. In addition, there is no GitHub activity from the original author [Henri Hakkinen](https://github.com/henux) in past year. I have created this independent repo for the project with the name `googler`. I retained the original copyright information. 2. Google provides a search API which returns the results in JSON format. However, as per my understanding from the [official docs](https://developers.google.com/custom-search/json-api/v1/overview), the API issues the queries against an existing instance of a custom search engine and is limited by 100 search queries per day for free. In addition, I have reservations in paying if they ever change their plan or restrict the API in other ways. So I refrained from coupling with Google plans & policies or exposing my trackable personal custom search API key and identifier for the public. I retained the browser-way of doing it by fetching html, which is a open and free specification. 3. You can find a rofi script for `googler` [here](http://hastebin.com/fonowacija.bash). Written by an anonymous user, untested and we don't maintain it. ### Contributions Pull requests are welcome. Please visit [#209](https://github.com/jarun/googler/issues/209) for a list of TODOs.

gitter chat

### Developers 1. Copyright © 2008 Henri Hakkinen 2. Copyright © 2015-2018 [Arun Prakash Jana](https://github.com/jarun) 3. [Zhiming Wang](https://github.com/zmwangx) 4. [Johnathan Jenkins](https://github.com/shaggytwodope) 5. [SZ Lin](https://github.com/szlin) Special thanks to [jeremija](https://github.com/jeremija) and [Narrat](https://github.com/Narrat) for their contributions. ### Logo Logo copyright © 2017 Zhiming Wang. You may freely redistribute it alongside the code, or use it when describing or linking to this project. You should NOT create modified versions of it, make it the logo or icon of your project (except personal forks and/or forks with the goal of upstreaming), or otherwise use it without written permission. googler-3.5/auto-completion/000077500000000000000000000000001324143632700161435ustar00rootroot00000000000000googler-3.5/auto-completion/bash/000077500000000000000000000000001324143632700170605ustar00rootroot00000000000000googler-3.5/auto-completion/bash/googler-completion.bash000066400000000000000000000030351324143632700235250ustar00rootroot00000000000000# # Rudimentary Bash completion definition for googler. # # Author: # Zhiming Wang # _googler () { COMPREPLY=() local IFS=$' \n' local cur=$2 prev=$3 local -a opts opts_with_args opts=( -h --help -s --start -n --count -N --news -c --tld -l --lang -x --exact -C --nocolor --colors -j --first --lucky -t --time -w --site --unfilter -p --proxy --noua --notweak --json --url-handler --show-browser-logs --np --noprompt -u --upgrade --include-git -v --version -d --debug ) opts_with_arg=( -s --start -n --count -c --tld -l --lang --colors -t --time -w --site -p --proxy --url-handler ) if [[ $cur == -* ]]; then # The current argument is an option -- complete option names. COMPREPLY=( $(compgen -W "${opts[*]}" -- "$cur") ) else # Do not complete option arguments; only autocomplete positional # arguments (queries). for opt in "${opts_with_arg[@]}"; do [[ $opt == $prev ]] && return 1 done local completion COMPREPLY=() while IFS= read -r completion; do # Quote spaces for `complete -W wordlist` COMPREPLY+=( "${completion// /\\ }" ) done < <(googler --complete "$cur") fi return 0 } complete -F _googler googler googler-3.5/auto-completion/fish/000077500000000000000000000000001324143632700170745ustar00rootroot00000000000000googler-3.5/auto-completion/fish/googler.fish000066400000000000000000000050271324143632700214110ustar00rootroot00000000000000# # Fish completion definition for googler. # # Author: # Arun Prakash Jana # function __fish_googler_non_option_argument not string match -- "-*" (commandline -ct) end function __fish_googler_complete_query googler --complete (commandline -ct) ^/dev/null end complete -c googler -s h -l help --description 'show help text and exit' complete -c googler -s s -l start -r --description 'start at the Nth result' complete -c googler -s n -l count -r --description 'show specified number of results (default 10)' complete -c googler -s N -l news --description 'show results from news section' complete -c googler -s c -l tld -r --description 'country-specific search with top-level domain' complete -c googler -s l -l lang -r --description 'display in specified language' complete -c googler -s x -l exact --description 'disable automatic spelling correction' complete -c googler -s C -l nocolor --description 'disable color output' complete -c googler -l colors -r --description 'set output colors' complete -c googler -s j -l first -l lucky --description 'open the first result in a web browser' complete -c googler -s t -l time -r --description 'time limit search (h/d/w/m/y + number)' complete -c googler -s w -l site -r --description 'search a site using Google' complete -c googler -l unfilter --description 'do not omit similar results' complete -c googler -s p -l proxy -r --description 'proxy in HOST:PORT format' complete -c googler -l noua --description 'disable user agent' complete -c googler -l notweak --description 'disable TCP optimizations, forced TLS 1.2' complete -c googler -l json --description 'output in JSON format' complete -c googler -l url-handler -r --description 'cli script or utility' complete -c googler -l show-browser-logs --description 'do not suppress browser output' complete -c googler -l np -l noprompt --description 'perform search and exit' complete -c googler -s u -l upgrade --description 'perform in-place self-upgrade' complete -c googler -l include-git --description 'use git master for --upgrade' complete -c googler -s v -l version --description 'show version number and exit' complete -c googler -s d -l debug --description 'enable debugging' complete -c googler -n __fish_googler_non_option_argument -a '(__fish_googler_complete_query)' googler-3.5/auto-completion/googler_at/000077500000000000000000000000001324143632700202655ustar00rootroot00000000000000googler-3.5/auto-completion/googler_at/googler_at000066400000000000000000000164331324143632700223410ustar00rootroot00000000000000# googler @t alias list # Author: Arun Prakash Jana # email: engineerarun@gmail.com # # To request key addition or removal upstream, please drop an email. # A # Amazon.com alias @a='googler -w amazon.com' # AlternativeTo alias @alt='googler -w alternativeto.net' # Android Developers alias @android='googler -w developer.android.com' # ARM Information Center alias @arm='googler -w infocenter.arm.com' # asciinema alias @asciinema='googler -w asciinema.org' # Ask Ubuntu alias @askubuntu='googler -w askubuntu.com' # Arch User Repository alias @aur='googler -w aur.archlinux.org' # Arch Wiki alias @aw='googler -w wiki.archlinux.org' # AZLyrics alias @azl='googler -w azlyrics.com' # B # BBC alias @bbc='googler -w bbc.co.uk' # Encyclopaedia Britannica alias @britannica='googler -w britannica.com' # C # crunchbase alias @cb='googler -w crunchbase.com' # Chrome Extensions alias @chrome='googler -w chrome.google.com' # craigslist alias @cl='googler -w craigslist.org' # commandlinefu alias @cmd='googler -w commandlinefu.com' # CNN alias @cnn='googler -w cnn.com' # Comedy Central alias @comedy='googler -w cc.com' # CPP Reference alias @cpp='googler -w en.cppreference.com' # Cracked.com alias @cracked='googler -w cracked.com' # Cricinfo alias @cricinfo='googler -w espncricinfo.com' # D # The Free Dictionary alias @d='googler -w thefreedictionary.com' # Dictionary.com alias @di='googler -w dictionary.com' # DistroWatch alias @distrowatch='googler -w distrowatch.com' # Debian Package Search alias @dpkg='googler -w packages.debian.org' # E # eBay alias @e='googler -w ebay.com' # Episode Guides alias @eg='googler -w epguides.com' # Embedded alias @embedded='googler -w embedded.com' # ESPN alias @espn='googler -w espn.com' # Etsy alias @etsy='googler -w etsy.com' # Online Etymology Dictionary alias @etym='googler -w etymonline.com' # F # Facebook alias @fb='googler -w facebook.com' # Fandango Movie Reviews alias @fd='googler -w fandango.com' # Firefox Add-ons alias @firefox='googler -w addons.mozilla.org' # Flipkart alias @fk='googler -w flipkart.com' # Forbes alias @forbes='googler -w forbes.com' # Forvo alias @forvo='googler -w forvo.com' # Financial Times alias @ft='googler -w markets.ft.com' # G # Google Search alias @g='googler' # Genius Lyrics alias @genius='googler -w genius.com' # GitHub alias @gh='googler -w github.com' # GNU alias @gnu='googler -w gnu.org' # Goal alias @goal='googler -w goal.com' # Goear Music alias @goear='googler -w goear.com' # The GNU Privacy Guard alias @gpg='googler -w gnupg.org' # Project Gutenberg alias @gutenberg='googler -w gutenberg.org' # H # Hackaday alias @had='googler -w hackaday.com' # History alias @history='googler -w history.com' # Hacker News alias @hn='googler -w news.ycombinator.com' # HowStuffWorks alias @hsw='googler -w howstuffworks.com' # HowtoForge alias @htf='googler -w howtoforge.com' # Hulu alias @hulu='googler -w hulu.com' # I # IEEE alias @ieee='googler -w ieee.org' # IETF alias @ietf='googler -w ietf.org' # IETF Datatracker alias @ietfd='googler -w datatracker.ietf.org' # Instagram alias @ig='googler -w instagram.com' # IMDB alias @imdb='googler -w imdb.com' # Internet Radio alias @iradio='googler -w internet-radio.com' # J # K # The Linux Kernel Archives alias @kernel='googler -w kernel.org' # Khan Academy alias @khan='googler -w khanacademy.org' # L # Last.fm alias @lfm='googler -w last.fm' # LinkedIn alias @li='googler -w linkedin.com' # Linux.com alias @linux='googler -w linux.com' # Linux Journal alias @lj='googler -w linuxjournal.com' # LinuxQuestions alias @lq='googler -w linuxquestions.org' # LQWiki alias @lqw='googler -w wiki.linuxquestions.org' # LWN.net alias @lwn='googler -w lwn.net' # Linux Cross Reference alias @lxr='googler -w lxr.free-electrons.com' # M # Ubuntu Manpage alias @man='googler -w manpages.ubuntu.com' # Linux manual page alias @man7='googler -w man7.org' # Manga Reader alias @mangar='googler -w mangareader.net' # MLB alias @mlb='googler -w mlb.mlb.com' # Mac Rumors alias @mr='googler -w macrumors.com' # N # Google News alias @n='googler -N' # Nature Research alias @nature='googler -N nature.com' # NBA alias @nba='googler -N nba.com' # National Geographic alias @ng='googler -N nationalgeographic.com' # National Programme on Technology Enhanced Learning alias @nptel='googler -w nptel.ac.in' # O # MIT OpenCourseWare alias @ocw='googler -w ocw.mit.edu' # Open Embedded alias @oembedded='googler -w openembedded.org' # OMG! Ubuntu! alias @omg='googler -w omgubuntu.co.uk' # OpenSubtitles alias @op='googler -w opensubtitles.org' # Opensource.com alias @opensource='googler -w opensource.com' # Open Source Alternative alias @osalt='googler -w osalt.com' # OSDev Wiki alias @osdev='googler -w wiki.osdev.org' # OpenWrt alias @owrt='googler -w openwrt.org' # Oxford Dictionary alias @ox='googler -w en.oxforddictionaries.com' # P # Google Patents alias @patent='googler -w patents.google.com' # The Pirate Bay alias @pirate='googler -w thepiratebay.org' # Android Apps alias @play='googler -w play.google.com' # PlayOnLinux alias @playonlinux='googler -w playonlinux.com' # Python documentation alias @python='googler -w docs.python.org' # Q # Quora alias @q='googler -w quora.com' # Wikiquote alias @quotes='googler -w en.wikiquote.org' # R # Reddit alias @r='googler -w reddit.com' # Reader's Digest alias @rd='googler -w rd.com' # RFC Reader alias @rfc='googler -w rfc-editor.org' # Rpmfind alias @rpm='googler -w rpmfind.net' # Rotten Tomatoes alias @rt='googler -w rottentomatoes.com' # S # OnlineSlangDictionary alias @slang='googler -w onlineslangdictionary.com' # Stack Overflow alias @so='googler -w stackoverflow.com' # Softpedia alias @softpedia='googler -w softpedia.com' # SurceForge alias @sourceforge='googler -w sourceforge.net' # Subscene alias @ss='googler -w subscene.com' # Steam alias @st='googler -w store.steampowered.com' # T # Thesaurus.com alias @t='googler -w thesaurus.com' # TED Talks alias @ted='googler -w ted.com' # The Linux Documentation Project alias @tldp='googler -w tldp.org' # tl;drLegal alias @tldrlegal='googler -w tldrlegal.com' # Torrentz2 alias @to='googler -w torrentz2.eu' # The Pirate Bay alias @tpb='googler -w thepiratebay.org' # TuneIn alias @tunein='googler -w tunein.com' # Twitter alias @tw='googler -w twitter.com' # Twitch alias @twitch='googler -w twitch.tv' # U # Ubuntu Forums alias @ubuntuforums='googler -w ubuntuforums.org' # Ubuntu Packages alias @ubuntupackages='googler -w packages.ubuntu.com' # Ubuntu Wiki alias @uwiki='googler -w wiki.ubuntu.com' # V # Vim Wiki alias @vim='googler -w vim.org' # W # Wikipedia alias @w='googler -w en.wikipedia.org' # Walmart alias @walmart='googler -w walmart.com' # Weather.com alias @weather='googler -w weather.com' # Wikia alias @wikia='googler -w wikia.com' # X # XKCD alias @xkcd='googler -w xkcd.com' # Y # Yahoo alias @y='googler -w yahoo.com' # Yahoo Finance alias @yf='googler -w finance.yahoo.com' # YouTube alias @yt='googler -w youtube.com' # Z # ZDNet alias @zdnet='googler -w zdnet.com' googler-3.5/auto-completion/zsh/000077500000000000000000000000001324143632700167475ustar00rootroot00000000000000googler-3.5/auto-completion/zsh/_googler000066400000000000000000000051641324143632700204750ustar00rootroot00000000000000#compdef googler # # Completion definition for googler. # # Author: # Zhiming Wang # setopt localoptions noshwordsplit noksharrays _googler_query_caching_policy () { # rebuild if cache is more than a day old local -a oldp oldp=( $1(Nm+1) ) (( $#oldp )) } _googler_complete_query () { local prefix=$words[CURRENT] [[ -n $prefix && $prefix != -* ]] || return local cache_id=googler_$prefix zstyle -s :completion:${curcontext}: cache-policy update_policy [[ -z $update_policy ]] && zstyle :completion:${curcontext}: cache_policy _googler_query_caching_policy local -a completions if _cache_invalid $cache_id || ! _retrieve_cache $cache_id; then completions=( ${(f)"$(googler --complete $prefix 2>/dev/null)"} ) _store_cache $cache_id completions fi compadd $@ -- $completions } local -a args args=( '(- : *)'{-h,--help}'[show help text and exit]' '(-s --start)'{-s,--start}'[start at the Nth result]:result number' '(-n --count)'{-n,--count}'[show specified number of results (default 10)]:count' '(-N --news)'{-N,--news}'[show results from news section]' '(-c --tld)'{-c,--tld}'[country-specific search with top-level domain]:top level domain without dot' '(-l --lang)'{-l,--lang}'[display in specified language]:language code' '(-x --exact)'{-x,--exact}'[disable automatic spelling correction]' '(-C --nocolor)'{-C,--nocolor}'[disable color output]' '(--colors)--colors[set output colors]:six-letter string' '(-j --first --lucky)'{-j,--first,--lucky}'[open the first result in a web browser]' '(-t --time)'{-t,--time}'[time limit search]:period (h/d/w/m/y + number)' '(-w --site)'{-w,--site}'[search a site using Google]:domain' '(--unfilter)--unfilter[do not omit similar results]' '(-p --proxy)'{-p,--proxy}'[proxy in HOST:PORT format]:proxy details' '(--noua)--noua[disable user agent]' '(--notweak)--notweak[disable TCP optimizations, forced TLS 1.2]' '(--json)--json[output in JSON format; implies --exact and --noprompt]' '(--url-handler)--url-handler[cli script or utility]:url opener' '(--show-browser-logs)--show-browser-logs[do not suppress browser output]' '(--np --noprompt)'{--np,--noprompt}'[perform search and exit, do not prompt for further interactions]' '(-u --upgrade)'{-u,--upgrade}'[perform in-place self-upgrade]' '(--include-git)--include-git[when used with --upgrade, upgrade to git master]' '(- : *)'{-v,--version}'[show version number and exit]' '(-d --debug)'{-d,--debug}'[enable debugging]' '*:::query:_googler_complete_query' ) _arguments -S -s $args googler-3.5/googler000077500000000000000000002625461324143632700144270ustar00rootroot00000000000000#!/usr/bin/env python3 # # Copyright © 2008 Henri Hakkinen # Copyright © 2015-2018 Arun Prakash Jana # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . import argparse import atexit import base64 import collections import codecs import functools import gzip import html.entities import html.parser import http.client from http.client import HTTPSConnection import locale import logging import os import shutil import signal import socket import ssl from subprocess import Popen, PIPE, DEVNULL import sys import textwrap import urllib.parse import webbrowser # Python optional dependency compatibility layer try: import readline except ImportError: pass # Basic setup try: import setproctitle setproctitle.setproctitle('googler') except Exception: pass logging.basicConfig(format='[%(levelname)s] %(message)s') logger = logging.getLogger() def sigint_handler(signum, frame): print('\nInterrupted.', file=sys.stderr) sys.exit(1) signal.signal(signal.SIGINT, sigint_handler) # Constants _VERSION_ = '3.5' COLORMAP = {k: '\x1b[%sm' % v for k, v in { 'a': '30', 'b': '31', 'c': '32', 'd': '33', 'e': '34', 'f': '35', 'g': '36', 'h': '37', 'i': '90', 'j': '91', 'k': '92', 'l': '93', 'm': '94', 'n': '95', 'o': '96', 'p': '97', 'A': '30;1', 'B': '31;1', 'C': '32;1', 'D': '33;1', 'E': '34;1', 'F': '35;1', 'G': '36;1', 'H': '37;1', 'I': '90;1', 'J': '91;1', 'K': '92;1', 'L': '93;1', 'M': '94;1', 'N': '95;1', 'O': '96;1', 'P': '97;1', 'x': '0', 'X': '1', 'y': '7', 'Y': '7;1', }.items()} # Disguise as Firefox on Ubuntu USER_AGENT = ('Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:58.0) Gecko/20100101 Firefox/58.0') ua = True # User Agent is enabled by default text_browsers = ['elinks', 'links', 'lynx', 'w3m', 'www-browser'] # Self-upgrade parameters # # Downstream packagers are recommended to turn off the entire self-upgrade # mechanism through # # make disable-self-upgrade # # before running `make install'. ENABLE_SELF_UPGRADE_MECHANISM = True API_REPO_BASE = 'https://api.github.com/repos/jarun/googler' RAW_DOWNLOAD_REPO_BASE = 'https://raw.githubusercontent.com/jarun/googler' # Global helper functions def open_url(url): """Open an URL in the user's default web browser. The string attribute ``open_url.url_handler`` can be used to open URLs in a custom CLI script or utility. A subprocess is spawned with url as the parameter in this case instead of the usual webbrowser.open() call. Whether the browser's output (both stdout and stderr) are suppressed depends on the boolean attribute ``open_url.suppress_browser_output``. If the attribute is not set upon a call, set it to a default value, which means False if BROWSER is set to a known text-based browser -- elinks, links, lynx, w3m or 'www-browser'; or True otherwise. The string attribute ``open_url.override_text_browser`` can be used to ignore env var BROWSER as well as some known text-based browsers and attempt to open url in a GUI browser available. Note: If a GUI browser is indeed found, this option ignores the program option `show-browser-logs` """ logger.debug('Opening %s', url) # Custom URL handler gets max priority if hasattr(open_url, 'url_handler'): p = Popen([open_url.url_handler, url], stdin=PIPE) p.communicate() return browser = webbrowser.get() if open_url.override_text_browser: browser_output = open_url.suppress_browser_output for name in [b for b in webbrowser._tryorder if b not in text_browsers]: browser = webbrowser.get(name) logger.debug(browser) # Found a GUI browser, suppress browser output open_url.suppress_browser_output = True break if open_url.suppress_browser_output: _stderr = os.dup(2) os.close(2) _stdout = os.dup(1) os.close(1) fd = os.open(os.devnull, os.O_RDWR) os.dup2(fd, 2) os.dup2(fd, 1) try: browser.open(url, new=2) finally: if open_url.suppress_browser_output: os.close(fd) os.dup2(_stderr, 2) os.dup2(_stdout, 1) if open_url.override_text_browser: open_url.suppress_browser_output = browser_output def printerr(msg): """Print message, verbatim, to stderr. ``msg`` could be any stringifiable value. """ print(msg, file=sys.stderr) def unwrap(text): """Unwrap text.""" lines = text.split('\n') result = '' for i in range(len(lines) - 1): result += lines[i] if not lines[i]: # Paragraph break result += '\n\n' elif lines[i + 1]: # Next line is not paragraph break, add space result += ' ' # Handle last line result += lines[-1] if lines[-1] else '\n' return result def check_stdout_encoding(): """Make sure stdout encoding is utf-8. If not, print error message and instructions, then exit with status 1. This function is a no-op on win32 because encoding on win32 is messy, and let's just hope for the best. /s """ if sys.platform == 'win32': return # Use codecs.lookup to resolve text encoding alias encoding = codecs.lookup(sys.stdout.encoding).name if encoding != 'utf-8': locale_lang, locale_encoding = locale.getlocale() if locale_lang is None: locale_lang = '' if locale_encoding is None: locale_encoding = '' ioencoding = os.getenv('PYTHONIOENCODING', 'not set') sys.stderr.write(unwrap(textwrap.dedent("""\ stdout encoding '{encoding}' detected. googler requires utf-8 to work properly. The wrong encoding may be due to a non-UTF-8 locale or an improper PYTHONIOENCODING. (For the record, your locale language is {locale_lang} and locale encoding is {locale_encoding}; your PYTHONIOENCODING is {ioencoding}.) Please set a UTF-8 locale (e.g., en_US.UTF-8) or set PYTHONIOENCODING to utf-8. """.format( encoding=encoding, locale_lang=locale_lang, locale_encoding=locale_encoding, ioencoding=ioencoding, )))) sys.exit(1) # Classes class TLS1_2Connection(HTTPSConnection): """Overrides HTTPSConnection.connect to specify TLS version NOTE: TLS 1.2 is supported from Python 3.4 """ def __init__(self, host, **kwargs): HTTPSConnection.__init__(self, host, **kwargs) def connect(self, notweak=False): sock = socket.create_connection((self.host, self.port), self.timeout, self.source_address) # Optimizations not available on OS X if not notweak and sys.platform.startswith('linux'): try: sock.setsockopt(socket.SOL_TCP, socket.TCP_DEFER_ACCEPT, 1) sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_QUICKACK, 1) sock.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, 524288) except OSError: # Doesn't work on Windows' Linux subsystem (#179) logger.debug('setsockopt failed') if getattr(self, '_tunnel_host', None): self.sock = sock elif not notweak: # Try to use TLS 1.2 ssl_context = None if hasattr(ssl, 'PROTOCOL_TLS'): # Since Python 3.5.3 ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS) ssl_context.options |= (ssl.OP_NO_SSLv2 | ssl.OP_NO_SSLv3 | ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1) elif hasattr(ssl, 'PROTOCOL_TLSv1_2'): # Since Python 3.4 ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLSv1_2) if ssl_context: self.sock = ssl_context.wrap_socket(sock) return # Fallback HTTPSConnection.connect(self) class GoogleUrl(object): """ This class constructs the Google Search/News URL. This class is modelled on urllib.parse.ParseResult for familiarity, which means it supports reading of all six attributes -- scheme, netloc, path, params, query, fragment -- of urllib.parse.ParseResult, as well as the geturl() method. However, the attributes (properties) and methods listed below should be the preferred methods of access to this class. Parameters ---------- opts : dict or argparse.Namespace, optional See the ``opts`` parameter of `update`. Other Parameters ---------------- See "Other Parameters" of `update`. Attributes ---------- hostname : str Read-write property. keywords : str or list of strs Read-write property. news : bool Read-only property. url : str Read-only property. Methods ------- full() relative() update(opts=None, **kwargs) set_queries(**kwargs) unset_queries(*args) next_page() prev_page() first_page() """ def __init__(self, opts=None, **kwargs): self.scheme = 'https' # self.netloc is a calculated property self.path = '/search' self.params = '' # self.query is a calculated property self.fragment = '' self._tld = None self._num = 10 self._start = 0 self._keywords = [] self._sites = None self._query_dict = { 'ie': 'UTF-8', 'oe': 'UTF-8', } self.update(opts, **kwargs) def __str__(self): return self.url @property def url(self): """The full Google URL you want.""" return self.full() @property def hostname(self): """The hostname.""" return self.netloc @hostname.setter def hostname(self, hostname): self.netloc = hostname @property def keywords(self): """The keywords, either a str or a list of strs.""" return self._keywords @keywords.setter def keywords(self, keywords): self._keywords = keywords @property def news(self): """Whether the URL is for Google News.""" return 'tbm' in self._query_dict and self._query_dict['tbm'] == 'nws' def full(self): """Return the full URL. Returns ------- str """ url = (self.scheme + ':') if self.scheme else '' url += '//' + self.netloc + self.relative() return url def relative(self): """Return the relative URL (without scheme and authority). Authority (see RFC 3986 section 3.2), or netloc in the terminology of urllib.parse, basically means the hostname here. The relative URL is good for making HTTP(S) requests to a known host. Returns ------- str """ rel = self.path if self.params: rel += ';' + self.params if self.query: rel += '?' + self.query if self.fragment: rel += '#' + self.fragment return rel def update(self, opts=None, **kwargs): """Update the URL with the given options. Parameters ---------- opts : dict or argparse.Namespace, optional Carries options that affect the Google Search/News URL. The list of currently recognized option keys with expected value types: duration: str (GooglerArgumentParser.is_duration) exact: bool keywords: str or list of strs lang: str news: bool num: int site: str start: int tld: str unfilter: bool Other Parameters ---------------- kwargs The `kwargs` dict extends `opts`, that is, options can be specified either way, in `opts` or as individual keyword arguments. """ if opts is None: opts = {} if hasattr(opts, '__dict__'): opts = opts.__dict__ opts.update(kwargs) qd = self._query_dict if 'duration' in opts and opts['duration']: qd['tbs'] = 'qdr:%s' % opts['duration'] if 'exact' in opts: if opts['exact']: qd['nfpr'] = 1 else: qd.pop('nfpr', None) if 'keywords' in opts: self._keywords = opts['keywords'] if 'lang' in opts and opts['lang']: qd['hl'] = opts['lang'] if 'news' in opts: if opts['news']: qd['tbm'] = 'nws' else: qd.pop('tbm', None) if 'num' in opts: self._num = opts['num'] if 'sites' in opts: self._sites = opts['sites'] if 'start' in opts: self._start = opts['start'] if 'tld' in opts: self._tld = opts['tld'] if 'unfilter' in opts and opts['unfilter']: qd['filter'] = 0 def set_queries(self, **kwargs): """Forcefully set queries outside the normal `update` mechanism. Other Parameters ---------------- kwargs Arbitrary key value pairs to be set in the query string. All keys and values should be stringifiable. Note that certain keys, e.g., ``q``, have their values constructed on the fly, so setting those has no actual effect. """ for k, v in kwargs.items(): self._query_dict[k] = v def unset_queries(self, *args): """Forcefully unset queries outside the normal `update` mechanism. Other Parameters ---------------- args Arbitrary keys to be unset. No exception is raised if a key does not exist in the first place. Note that certain keys, e.g., ``q``, are always included in the resulting URL, so unsetting those has no actual effect. """ for k in args: self._query_dict.pop(k, None) def next_page(self): """Navigate to the next page.""" self._start += self._num def prev_page(self): """Navigate to the previous page. Raises ------ ValueError If already at the first page (``start=0`` in the current query string). """ if self._start == 0: raise ValueError('Already at the first page.') self._start = (self._start - self._num) if self._start > self._num else 0 def first_page(self): """Navigate to the first page. Raises ------ ValueError If already at the first page (``start=0`` in the current query string). """ if self._start == 0: raise ValueError('Already at the first page.') self._start = 0 # Data source: https://web.archive.org/web/20170615200243/https://en.wikipedia.org/wiki/List_of_Google_domains # Scraper script: https://gist.github.com/zmwangx/b976e83c14552fe18b71 TLD_TO_DOMAIN_MAP = { 'ac': 'google.ac', 'ad': 'google.ad', 'ae': 'google.ae', 'af': 'google.com.af', 'ag': 'google.com.ag', 'ai': 'google.com.ai', 'al': 'google.al', 'am': 'google.am', 'ao': 'google.co.ao', 'ar': 'google.com.ar', 'as': 'google.as', 'at': 'google.at', 'au': 'google.com.au', 'az': 'google.az', 'ba': 'google.ba', 'bd': 'google.com.bd', 'be': 'google.be', 'bf': 'google.bf', 'bg': 'google.bg', 'bh': 'google.com.bh', 'bi': 'google.bi', 'bj': 'google.bj', 'bn': 'google.com.bn', 'bo': 'google.com.bo', 'br': 'google.com.br', 'bs': 'google.bs', 'bt': 'google.bt', 'bw': 'google.co.bw', 'by': 'google.by', 'bz': 'google.com.bz', 'ca': 'google.ca', 'cat': 'google.cat', 'cc': 'google.cc', 'cd': 'google.cd', 'cf': 'google.cf', 'cg': 'google.cg', 'ch': 'google.ch', 'ci': 'google.ci', 'ck': 'google.co.ck', 'cl': 'google.cl', 'cm': 'google.cm', 'cn': 'google.cn', 'co': 'google.com.co', 'cr': 'google.co.cr', 'cu': 'google.com.cu', 'cv': 'google.cv', 'cy': 'google.com.cy', 'cz': 'google.cz', 'de': 'google.de', 'dj': 'google.dj', 'dk': 'google.dk', 'dm': 'google.dm', 'do': 'google.com.do', 'dz': 'google.dz', 'ec': 'google.com.ec', 'ee': 'google.ee', 'eg': 'google.com.eg', 'es': 'google.es', 'et': 'google.com.et', 'fi': 'google.fi', 'fj': 'google.com.fj', 'fm': 'google.fm', 'fr': 'google.fr', 'ga': 'google.ga', 'ge': 'google.ge', 'gf': 'google.gf', 'gg': 'google.gg', 'gh': 'google.com.gh', 'gi': 'google.com.gi', 'gl': 'google.gl', 'gm': 'google.gm', 'gp': 'google.gp', 'gr': 'google.gr', 'gt': 'google.com.gt', 'gy': 'google.gy', 'hk': 'google.com.hk', 'hn': 'google.hn', 'hr': 'google.hr', 'ht': 'google.ht', 'hu': 'google.hu', 'id': 'google.co.id', 'ie': 'google.ie', 'il': 'google.co.il', 'im': 'google.im', 'in': 'google.co.in', 'io': 'google.io', 'iq': 'google.iq', 'is': 'google.is', 'it': 'google.it', 'je': 'google.je', 'jm': 'google.com.jm', 'jo': 'google.jo', 'jp': 'google.co.jp', 'ke': 'google.co.ke', 'kg': 'google.kg', 'kh': 'google.com.kh', 'ki': 'google.ki', 'kr': 'google.co.kr', 'kw': 'google.com.kw', 'kz': 'google.kz', 'la': 'google.la', 'lb': 'google.com.lb', 'lc': 'google.com.lc', 'li': 'google.li', 'lk': 'google.lk', 'ls': 'google.co.ls', 'lt': 'google.lt', 'lu': 'google.lu', 'lv': 'google.lv', 'ly': 'google.com.ly', 'ma': 'google.co.ma', 'md': 'google.md', 'me': 'google.me', 'mg': 'google.mg', 'mk': 'google.mk', 'ml': 'google.ml', 'mm': 'google.com.mm', 'mn': 'google.mn', 'ms': 'google.ms', 'mt': 'google.com.mt', 'mu': 'google.mu', 'mv': 'google.mv', 'mw': 'google.mw', 'mx': 'google.com.mx', 'my': 'google.com.my', 'mz': 'google.co.mz', 'na': 'google.com.na', 'ne': 'google.ne', 'nf': 'google.com.nf', 'ng': 'google.com.ng', 'ni': 'google.com.ni', 'nl': 'google.nl', 'no': 'google.no', 'np': 'google.com.np', 'nr': 'google.nr', 'nu': 'google.nu', 'nz': 'google.co.nz', 'om': 'google.com.om', 'pa': 'google.com.pa', 'pe': 'google.com.pe', 'pg': 'google.com.pg', 'ph': 'google.com.ph', 'pk': 'google.com.pk', 'pl': 'google.pl', 'pn': 'google.co.pn', 'pr': 'google.com.pr', 'ps': 'google.ps', 'pt': 'google.pt', 'py': 'google.com.py', 'qa': 'google.com.qa', 'ro': 'google.ro', 'rs': 'google.rs', 'ru': 'google.ru', 'rw': 'google.rw', 'sa': 'google.com.sa', 'sb': 'google.com.sb', 'sc': 'google.sc', 'se': 'google.se', 'sg': 'google.com.sg', 'sh': 'google.sh', 'si': 'google.si', 'sk': 'google.sk', 'sl': 'google.com.sl', 'sm': 'google.sm', 'sn': 'google.sn', 'so': 'google.so', 'sr': 'google.sr', 'st': 'google.st', 'sv': 'google.com.sv', 'td': 'google.td', 'tg': 'google.tg', 'th': 'google.co.th', 'tj': 'google.com.tj', 'tk': 'google.tk', 'tl': 'google.tl', 'tm': 'google.tm', 'tn': 'google.tn', 'to': 'google.to', 'tr': 'google.com.tr', 'tt': 'google.tt', 'tw': 'google.com.tw', 'tz': 'google.co.tz', 'ua': 'google.com.ua', 'ug': 'google.co.ug', 'uk': 'google.co.uk', 'uy': 'google.com.uy', 'uz': 'google.co.uz', 'vc': 'google.com.vc', 've': 'google.co.ve', 'vg': 'google.vg', 'vi': 'google.co.vi', 'vn': 'google.com.vn', 'vu': 'google.vu', 'ws': 'google.ws', 'za': 'google.co.za', 'zm': 'google.co.zm', 'zw': 'google.co.zw', } @property def netloc(self): """The hostname.""" try: return 'www.' + self.TLD_TO_DOMAIN_MAP[self._tld] except KeyError: return 'www.google.com' @property def query(self): """The query string.""" qd = {} qd.update(self._query_dict) if self._num != 10: # Skip sending the default qd['num'] = self._num if self._start: # Skip sending the default qd['start'] = self._start # Construct the q query q = '' keywords = self._keywords sites = self._sites if keywords: if isinstance(keywords, list): q += '+'.join(urllib.parse.quote_plus(kw) for kw in keywords) else: q += urllib.parse.quote_plus(keywords) if sites: q += '+OR'.join('+site:' + urllib.parse.quote_plus(site) for site in sites) qd['q'] = q return '&'.join('%s=%s' % (k, qd[k]) for k in sorted(qd.keys())) class GoogleConnectionError(Exception): pass class GoogleConnection(object): """ This class facilitates connecting to and fetching from Google. Parameters ---------- See http.client.HTTPSConnection for documentation of the parameters. Raises ------ GoogleConnectionError Attributes ---------- host : str The currently connected host. Read-only property. Use `new_connection` to change host. Methods ------- new_connection(host=None, port=None, timeout=45) renew_connection(timeout=45) fetch_page(url) close() """ def __init__(self, host, port=None, timeout=45, proxy=None, notweak=False): self._host = None self._port = None self._proxy = proxy self._notweak = notweak self._conn = None self.new_connection(host, port=port, timeout=timeout) self.cookie = '' @property def host(self): """The host currently connected to.""" return self._host def new_connection(self, host=None, port=None, timeout=45): """Close the current connection (if any) and establish a new one. Parameters ---------- See http.client.HTTPSConnection for documentation of the parameters. Renew the connection (i.e., reuse the current host and port) if host is None or empty. Raises ------ GoogleConnectionError """ if self._conn: self._conn.close() if not host: host = self._host port = self._port self._host = host self._port = port host_display = host + (':%d' % port if port else '') proxy = self._proxy if proxy: proxy_user_passwd, proxy_host_port = parse_proxy_spec(proxy) logger.debug('Connecting to proxy server %s', proxy_host_port) self._conn = TLS1_2Connection(proxy_host_port, timeout=timeout) logger.debug('Tunnelling to host %s' % host_display) connect_headers = {} if proxy_user_passwd: connect_headers['Proxy-Authorization'] = 'Basic %s' % base64.b64encode( proxy_user_passwd.encode('utf-8') ).decode('utf-8') self._conn.set_tunnel(host, port=port, headers=connect_headers) try: self._conn.connect(self._notweak) except Exception as e: msg = 'Failed to connect to proxy server %s: %s.' % (proxy, e) raise GoogleConnectionError(msg) else: logger.debug('Connecting to new host %s', host_display) self._conn = TLS1_2Connection(host, port=port, timeout=timeout) try: self._conn.connect(self._notweak) except Exception as e: msg = 'Failed to connect to %s: %s.' % (host_display, e) raise GoogleConnectionError(msg) def renew_connection(self, timeout=45): """Renew current connection. Equivalent to ``new_connection(timeout=timeout)``. """ self.new_connection(timeout=timeout) def fetch_page(self, url): """Fetch a URL. Allows one reconnection and multiple redirections before failing and raising GoogleConnectionError. Parameters ---------- url : str The URL to fetch, relative to the host. Raises ------ GoogleConnectionError When not getting HTTP 200 even after the allowed one reconnection and/or one redirection, or when Google is blocking query due to unusual activity. Returns ------- str Response payload, gunzipped (if applicable) and decoded (in UTF-8). """ try: self._raw_get(url) except (http.client.HTTPException, OSError) as e: logger.debug('Got exception: %s.', e) logger.debug('Attempting to reconnect...') self.renew_connection() try: self._raw_get(url) except http.client.HTTPException as e: logger.debug('Got exception: %s.', e) raise GoogleConnectionError("Failed to get '%s'." % url) resp = self._resp redirect_counter = 0 while resp.status != 200 and redirect_counter < 3: if resp.status in {301, 302, 303, 307, 308}: redirection_url = resp.getheader('location', '') if 'sorry/IndexRedirect?' in redirection_url or 'sorry/index?' in redirection_url: raise GoogleConnectionError('Connection blocked due to unusual activity.') self._redirect(redirection_url) resp = self._resp redirect_counter += 1 else: break if resp.status != 200: raise GoogleConnectionError('Got HTTP %d: %s' % (resp.status, resp.reason)) payload = resp.read() try: return gzip.decompress(payload).decode('utf-8') except OSError: # Not gzipped return payload.decode('utf-8') def _redirect(self, url): """Redirect to and fetch a new URL. Like `_raw_get`, the response is stored in ``self._resp``. A new connection is made if redirecting to a different host. Parameters ---------- url : str If absolute and points to a different host, make a new connection. Raises ------ GoogleConnectionError """ logger.debug('Redirecting to URL %s', url) segments = urllib.parse.urlparse(url) host = segments.netloc if host != self._host: self.new_connection(host) relurl = urllib.parse.urlunparse(('', '') + segments[2:]) try: self._raw_get(relurl) except http.client.HTTPException as e: logger.debug('Got exception: %s.', e) raise GoogleConnectionError("Failed to get '%s'." % url) def _raw_get(self, url): """Make a raw HTTP GET request. No status check (which implies no redirection). Response can be accessed from ``self._resp``. Parameters ---------- url : str URL relative to the host, used in the GET request. Raises ------ http.client.HTTPException """ logger.debug('Fetching URL %s', url) self._conn.request('GET', url, None, { 'Accept-Encoding': 'gzip', 'User-Agent': USER_AGENT if ua else '', 'Cookie': self.cookie, 'Connection': 'keep-alive', 'DNT': '1', }) self._resp = self._conn.getresponse() if self.cookie == '': complete_cookie = self._resp.getheader('Set-Cookie') # Cookie won't be available is already blocked if complete_cookie is not None: self.cookie = complete_cookie[:complete_cookie.find(';')] logger.debug('Cookie: %s' % self.cookie) def close(self): """Close the connection (if one is active).""" if self._conn: self._conn.close() def annotate_tag(annotated_starttag_handler): # See parser logic within the GoogleParser class for documentation. # # In particular, search for "Ignore List" to view detailed # documentation of the ignore list. # # annotated_starttag_handler(self, tag: str, attrsdict: dict) -> annotation # Returns: HTMLParser.handle_starttag(self, tag: str, attrs: list) -> None def handler(self, tag, attrs): # Get context; assumes that the handler is called SCOPE_start context = annotated_starttag_handler.__name__[:-6] # If context is 'ignore', ignore all tests if context == 'ignore': self.insert_annotation(tag, None) return attrs = dict(attrs) # Compare against ignore list ignored = False for selector in self.IGNORE_LIST: for attr in selector: if attr == 'tag': if tag != selector['tag']: break elif attr == 'class': tag_classes = set(self.classes(attrs)) selector_classes = set(self.classes(selector)) if not selector_classes.issubset(tag_classes): break else: if attrs[attr] != selector[attr]: break else: # Passed all criteria of the selector ignored = True break # If tag matches ignore list, annotate and hand over to ignore_* if ignored: self.insert_annotation(tag, context + '_ignored') self.set_handlers_to('ignore') return # Standard annotation = annotated_starttag_handler(self, tag, attrs) self.insert_annotation(tag, annotation) return handler def retrieve_tag_annotation(annotated_endtag_handler): # See parser logic within the GoogleParser class for documentation. # # annotated_endtag_handler(self, tag: str, annotation) -> None # Returns: HTMLParser.handle_endtag(self, tag: str) -> None def handler(self, tag): try: annotation = self.tag_annotations[tag].pop() except IndexError: # Malformed HTML -- more close tags than open tags annotation = None annotated_endtag_handler(self, tag, annotation) return handler class GoogleParser(html.parser.HTMLParser): """The members of this class parse the result HTML page fetched from Google server for a query. The custom parser looks for tags enclosing search results and extracts the URL, title and text for each search result. After parsing the complete HTML page results are returned in a list of objects of class Result. """ # Parser logic: # # - Guiding principles: # # 1. Tag handlers are contextual; # # 2. Contextual starttag and endtag handlers should come in pairs # and have a clear hierarchy; # # 3. starttag handlers should only yield control to a pair of # child handlers (that is, one level down the hierarchy), and # correspondingly, endtag handlers should only return control # to the parent (that is, the pair of handlers that gave it # control in the first place). # # Principle 3 is meant to enforce a (possibly implicit) stack # structure and thus prevent careless jumps that result in what's # essentially spaghetti code with liberal use of GOTOs. # # - HTMLParser.handle_endtag gives us a bare tag name without # context, which is not good for enforcing principle 3 when we # have, say, nested div tags. # # In order to precisely identify the matching opening tag, we # maintain a stack for each tag name with *annotations*. Important # opening tags (e.g., the ones where child handlers are # registered) can be annotated so that when we can watch for the # annotation in the endtag handler, and when the appropriate # annotation is popped, we perform the corresponding action (e.g., # switch back to old handlers). # # To facilitate this, each starttag handler is decorated with # @annotate_tag, which accepts a return value that is the # annotation (None by default), and additionally converts attrs to # a dict, which is much easier to work with; and each endtag # handler is decorated with @retrieve_tag_annotation which sends # an additional parameter that is the retrieved annotation to the # handler. # # Note that some of our tag annotation stacks leak over time: this # happens to tags like and
which are not # closed. However, these tags play no structural role, and come # only in small quantities, so it's not really a problem. # # - All textual data (result title, result abstract, etc.) are # processed through a set of shared handlers. These handlers store # text in a shared buffer self.textbuf which can be retrieved and # cleared at appropriate times. # # Data (including charrefs and entityrefs) are ignored initially, # and when data needs to be recorded, the start_populating_textbuf # method is called to register the appropriate data, charref and # entityref handlers so that they append to self.textbuf. When # recording ends, pop_textbuf should be called to extract the text # and clear the buffer. stop_populating_textbuf returns the # handlers to their pristine state (ignoring data). # # Methods: # - start_populating_textbuf(self, data_transformer: Callable[[str], str]) -> None # - pop_textbuf(self) -> str # - stop_populating_textbuf(self) -> None # # - Outermost starttag and endtag handler methods: root_*. The whole # parser starts and ends in this state. # # - Each result is wrapped in a
tag with class "g". # # #
#
# # - For each result, the first

tag with class "r" contains the # hyperlinked title, and the (optional) first
tag with class # "s" contains the abstract of the result. # # #

#

#
#
# # - Each title looks like # #

# # # file type (e.g. [PDF]) # # # result title # #

# # - For each abstract, the first tag with class "st" contains # the body text of the abstract. # # # # abstract text with markup on keywords # # # - Certain results may come with sitelinks, secondary results that # are usually subdomains or deep links within the primary # result. They are organized into a tag, and each sitelink # is in a separate
: # # # # # # # ... # # # ... #
#
# # Then for each sitelink, the hyperlinked title is in an

tag # with class "r", and the abstract is in a
tag with class # "st". They are not necessarily on the same level, but we don't # really care. # # #

# # sitelink title # #

# # #
# abstract text #
# # - Sometimes Google autocorrects a query. Whenever this happens # there will be a block whose English version reads "Showing # results for ... Search instead for ...", and the HTML # looks like # # Showing results for # google #
# # # We collect the text inside a.spell as the suggested spelling # (self.suggested_spelling). # # Note that: # # 1. When npfr=1 (exact), there could still be an # a.spell, in a block that reads (English version) "Did you mean: # ...". Therefore, we only consider the query autocorrected when a # meaningful .spell_orig is also present (self.autocorrected). # # 2. A few garbage display:none, empty tags related to spell # appear to be always present: span#srfm.spell, a#srfl.spell, # span#sifm.spell_orig, a#sifl.spell_orig. We need to exclude # the ids srfm, srfl, sifm and sifl from our consideration. # # - Sometimes Google omits similar (more like duplicate) result # entries. Whenever this happens there will be a notice in p#ofr. The way # to unfilter is to simply add '&filter=0' to the query string. # # # Google News # # - Google News results differ from Google Search results in the # following ways: # # For each result, the title in the same format, but there's a # metadata field in a
tag with class "slp", and the abstract # isn't as deeply embedded: it's in a
tag on the same level # with class "st". # # #

#
# ... # source # - # publishing time #
#
# abstract text again with markup on keywords #
# # # Ignore List # # - As good as our result criteria might be, sometimes results of # dubious value (usually from Google's value-add features) slip # through. The "People also ask" feature is a good example of this # type (a sample query is "VPN"; see screenshot # https://i.imgur.com/yfcsoQz.png). In these cases, we may want to # skip enclosing containers entirely. The ignore list feature is # designed for this purpose. # # The current ignore list is available in self.IGNORE_LIST. Each # entry (called a "selector") is a dict of attribute-value # pairs. Each attribute is matched verbatim to a tag's attribute, # except the "class" attribute, where we test for inclusion # instead (e.g. "c b a" matches "a b", just like it matches the # CSS selector ".a.b"). There's also a special "attribute" -- tag, # the meaning of which is obvious. A tag has to match all given # attributes to be considered a match for the selector. # # When a match is found, the tag is annotated as SCOPE_ignored, # where SCOPE is the current handler scope (e.g., root, result, # title, etc.), and the scope is switched to 'ignore'. All # descendants of the tag are ignored. When the corresponding end # tag is finally reach, the former scope is restored. # # # User Agent disabled (differences) # # 1. For Google News results,
is followed by tag #
#
# # 2. File mime type follows
# e.g. search - '3 hours youtube' # # # 10 Jun 2014 - 179 min - # Uploaded by Meditation Relax Music # #
3 HOURS Best Relaxing Music 'Romantic Piano" Background Music for Stress ... 3:03 ... #
# # 8. There's no a.spell_orig when the query is autocorrected; the # tag (linking to the exact search) is wrapped in the # span.spell_orig. def __init__(self, news=False): html.parser.HTMLParser.__init__(self) self.news = news self.autocorrected = False self.suggested_spelling = None self.filtered = False self.results = [] self.index = 0 self.textbuf = '' self.tag_annotations = {} self.set_handlers_to('root') # Ignore list IGNORE_LIST = [ # "People also ask" # Sample query: VPN # Screenshot: https://i.imgur.com/yfcsoQz.png { 'tag': 'div', 'class': 'related-question-pair' }, # We omit Google's "smart card" results (term coined by me) by # guarding against the 'g-blk' class (sample response: https://git.io/voJgB) { 'tag': 'div', 'class': 'g-blk' }, # We also guard against "smart-card" results with `--noua` option { 'tag': 'div', 'class': 'hp-xpdbox' } ] # Tag handlers @annotate_tag def root_start(self, tag, attrs): if tag == 'div' and 'g' in self.classes(attrs): # Initialize result field registers self.title = '' self.url = '' self.abstract = '' self.metadata = '' # Only used for Google News self.sitelinks = [] # Guard against sitelinks, which also have titles and # abstracts. In the case of news, guard against "card # sections" (secondary results to the same event). self.title_registered = False self.abstract_registered = False self.metadata_registered = False # Only used for Google News self.set_handlers_to('result') return 'result' # Autocorrect if tag == 'span' and 'spell_orig' in self.classes(attrs) and attrs.get('id') != 'sifm': self.autocorrected = True return if tag == 'a' and 'spell' in self.classes(attrs) and attrs.get('id') != 'srfl': self.start_populating_textbuf() return 'spell' # Omitted results if tag == 'p' and attrs.get('id') == 'ofr': self.filtered = True @retrieve_tag_annotation def root_end(self, tag, annotation): if annotation == 'spell': self.stop_populating_textbuf() self.suggested_spelling = self.pop_textbuf() @annotate_tag def result_start(self, tag, attrs): if not ua and tag == 'span' and 'mime' in self.classes(attrs): self.start_populating_textbuf() return 'title_filetype' if not self.title_registered and tag == 'h3' and 'r' in self.classes(attrs): self.set_handlers_to('title') return 'title' if not self.abstract_registered and tag == 'div' and 's' in self.classes(attrs): self.set_handlers_to('abstract') return 'abstract' if not ua and not self.abstract_registered \ and tag == 'span' and 'st' in self.classes(attrs): self.start_populating_textbuf(lambda text: text + ' ') return 'abstract_gservices' if not self.sitelinks and tag == 'table': if ua or (not self.news and 'ts' not in self.classes(attrs)): self.set_handlers_to('sitelink_table') return 'sitelink_table' if self.news: if not self.metadata_registered and tag == 'div' and 'slp' in self.classes(attrs): # Change metadata field separator from '-' to ', ' for better appearance if ua: self.start_populating_textbuf(lambda text: ', ' if text == '-' else text) else: self.start_populating_textbuf(lambda text: text.replace(' -', ',', 1) if ' - ' in text else text) return 'news_metadata' if not self.abstract_registered and tag == 'div' and 'st' in self.classes(attrs): self.start_populating_textbuf() return 'news_abstract' @retrieve_tag_annotation def result_end(self, tag, annotation): if annotation == 'result': if self.url: self.index += 1 result = Result(self.index, self.title, self.url, self.abstract, metadata=self.metadata if self.metadata else None, sitelinks=self.sitelinks) self.results.append(result) self.set_handlers_to('root') elif annotation == 'news_metadata': self.stop_populating_textbuf() self.metadata = self.pop_textbuf() self.metadata_registered = True elif annotation == 'news_abstract': self.stop_populating_textbuf() self.abstract = self.pop_textbuf() self.abstract_registered = True elif annotation == 'abstract_gservices': self.stop_populating_textbuf() self.abstract = self.pop_textbuf().replace(' ', ' ') self.abstract_registered = True @annotate_tag def title_start(self, tag, attrs): if ua and tag == 'span': # Print a space after the filetype indicator self.start_populating_textbuf(lambda text: text + ' ') return 'title_filetype' if tag == 'a' and 'href' in attrs: # Skip 'News for', 'Images for' search links if attrs['href'].startswith('/search'): return # Skip card results if not ua and "fl" in self.classes(attrs): return self.url = attrs['href'] try: start = self.url.index('?q=') + len('?q=') end = self.url.index('&sa=', start) self.url = urllib.parse.unquote_plus(self.url[start:end]) except ValueError: pass self.start_populating_textbuf() return 'title_link' @retrieve_tag_annotation def title_end(self, tag, annotation): if annotation == 'title_filetype': self.stop_populating_textbuf() elif annotation == 'title_link': self.stop_populating_textbuf() self.title = self.pop_textbuf() self.title_registered = True elif annotation == 'title': self.set_handlers_to('result') @annotate_tag def abstract_start(self, tag, attrs): if (not self.metadata_registered and tag == 'div' and 'slp' in self.classes(attrs)): self.start_populating_textbuf() return 'result_metadata' if tag == 'span' and 'st' in self.classes(attrs): self.start_populating_textbuf() return 'abstract_text' @retrieve_tag_annotation def abstract_end(self, tag, annotation): if annotation == 'result_metadata': self.stop_populating_textbuf() self.metadata = self.pop_textbuf().strip().replace('\u200e', '') self.metadata_registered = True elif annotation == 'abstract_text': self.stop_populating_textbuf() self.abstract = self.pop_textbuf() self.abstract_registered = True elif annotation == 'abstract': self.set_handlers_to('result') @annotate_tag def sitelink_table_start(self, tag, attrs): if tag == 'td': # Initialize a new sitelink self.current_sitelink = Sitelink('', '', '') self.set_handlers_to('sitelink') return 'sitelink' @retrieve_tag_annotation def sitelink_table_end(self, tag, annotation): if annotation == 'sitelink_table': self.set_handlers_to('result') @annotate_tag def sitelink_start(self, tag, attrs): if tag == 'h3' and 'r' in self.classes(attrs): self.set_handlers_to('sitelink_title') return 'sitelink_title' if tag == 'div' and 'st' in self.classes(attrs): self.start_populating_textbuf() return 'sitelink_abstract' @retrieve_tag_annotation def sitelink_end(self, tag, annotation): if annotation == 'sitelink_abstract': self.stop_populating_textbuf() self.current_sitelink.abstract = self.pop_textbuf() elif annotation == 'sitelink': if self.current_sitelink.url: self.sitelinks.append(self.current_sitelink) self.set_handlers_to('sitelink_table') @annotate_tag def sitelink_title_start(self, tag, attrs): if tag == 'a' and 'href' in attrs: self.current_sitelink.url = attrs['href'] try: start = self.current_sitelink.url.index('?q=') + len('?q=') end = self.current_sitelink.url.index('&sa=', start) self.current_sitelink.url = urllib.parse.unquote_plus(self.current_sitelink.url[start:end]) except ValueError: pass self.start_populating_textbuf() return 'sitelink_title_link' @retrieve_tag_annotation def sitelink_title_end(self, tag, annotation): if annotation == 'sitelink_title_link': self.stop_populating_textbuf() self.current_sitelink.title = self.pop_textbuf() elif annotation == 'sitelink_title': self.set_handlers_to('sitelink') # Generic methods # Set handle_starttag to SCOPE_start, and handle_endtag to SCOPE_end. def set_handlers_to(self, scope): self.handle_starttag = getattr(self, scope + '_start') self.handle_endtag = getattr(self, scope + '_end') def insert_annotation(self, tag, annotation): if tag not in self.tag_annotations: self.tag_annotations[tag] = [] self.tag_annotations[tag].append(annotation) @annotate_tag def ignore_start(self, tag, attrs): pass @retrieve_tag_annotation def ignore_end(self, tag, annotation): if annotation and annotation.endswith('_ignored'): # Strip '-ignore' suffix from annotation to obtain the outer # context name. context = annotation[:-8] self.set_handlers_to(context) def start_populating_textbuf(self, data_transformer=None): if data_transformer is None: # Record data verbatim self.handle_data = self.record_data else: def record_transformed_data(data): self.textbuf += data_transformer(data) self.handle_data = record_transformed_data self.handle_entityref = self.record_entityref self.handle_charref = self.record_charref def pop_textbuf(self): text = self.textbuf self.textbuf = '' return text def stop_populating_textbuf(self): self.handle_data = lambda data: None self.handle_entityref = lambda ref: None self.handle_charref = lambda ref: None def record_data(self, data): self.textbuf += data def record_entityref(self, ref): try: self.textbuf += chr(html.entities.name2codepoint[ref]) except KeyError: # Entity name not found; most likely rather sloppy HTML # where a literal ampersand is not escaped; For instance, # the HTML response returned by # # googler -c au -l ko expected # # contains the following tag # #

expected market return s&p 500

# # where &p is interpreted by HTMLParser as an entity (this # behaviour seems to be specific to Python 2.7). self.textbuf += '&' + ref def record_charref(self, ref): if ref.startswith('x'): char = chr(int(ref[1:], 16)) else: char = chr(int(ref)) self.textbuf += char @staticmethod def classes(attrs): """Get tag's classes from its attribute dict.""" return attrs.get('class', '').split() class Sitelink(object): """Container for a sitelink.""" def __init__(self, title, url, abstract): self.title = title self.url = url self.abstract = abstract self.index = '' Colors = collections.namedtuple('Colors', 'index, title, url, metadata, abstract, prompt, reset') class Result(object): """ Container for one search result, with output helpers. Parameters ---------- index : int or str title : str url : str abstract : str metadata : str, optional Only applicable to Google News results, with publisher name and publishing time. sitelinks : list, optional List of ``SiteLink`` objects. Attributes ---------- index : str title : str url : str abstract : str metadata : str or None sitelinks : list Class Variables --------------- colors : str Methods ------- print() jsonizable_object() urltable() """ # Class variables colors = None urlexpand = True def __init__(self, index, title, url, abstract, metadata=None, sitelinks=None): index = str(index) self.index = index self.title = title self.url = url self.abstract = abstract self.metadata = metadata self.sitelinks = [] if sitelinks is None else sitelinks self._urltable = {index: url} subindex = 'a' for sitelink in sitelinks: fullindex = index + subindex sitelink.index = fullindex self._urltable[fullindex] = sitelink.url subindex = chr(ord(subindex) + 1) def _print_title_and_url(self, index, title, url, indent=0): colors = self.colors if not self.urlexpand: segments = urllib.parse.urlparse(url) url = ' [' + segments.netloc + ']' # Pad index and url with `indent` number of spaces index = ' ' * indent + str(index) url = ' ' * indent + url if colors: print(colors.index + index + colors.reset, end='') if self.urlexpand: print(' ' + colors.title + title + colors.reset) print(colors.url + url + colors.reset) else: print(' ' + colors.title + title + colors.reset + colors.url + url + colors.reset) else: if self.urlexpand: print(' %s %s\n%s' % (index, title, url)) else: print(' %s %s%s' % (index, title, url)) def _print_metadata_and_abstract(self, abstract, metadata=None, indent=0): colors = self.colors try: columns, _ = os.get_terminal_size() except OSError: columns = 0 if metadata: if colors: print(colors.metadata + metadata + colors.reset) else: print(metadata) if colors: print(colors.abstract, end='') if columns > indent + 1: # Try to fill to columns fillwidth = columns - indent - 1 for line in textwrap.wrap(abstract.replace('\n', ''), width=fillwidth): print('%s%s' % (' ' * indent, line)) print('') else: print('%s\n' % abstract.replace('\n', ' ')) if colors: print(colors.reset, end='') def print(self): """Print the result entry.""" self._print_title_and_url(self.index, self.title, self.url) self._print_metadata_and_abstract(self.abstract, metadata=self.metadata) for sitelink in self.sitelinks: self._print_title_and_url(sitelink.index, sitelink.title, sitelink.url, indent=4) self._print_metadata_and_abstract(sitelink.abstract, indent=4) def jsonizable_object(self): """Return a JSON-serializable dict representing the result entry.""" obj = { 'title': self.title, 'url': self.url, 'abstract': self.abstract } if self.metadata: obj['metadata'] = self.metadata if self.sitelinks: obj['sitelinks'] = [sitelink.__dict__ for sitelink in self.sitelinks] return obj def urltable(self): """Return a index-to-URL table for the current result. Normally, the table contains only a single entry, but when the result contains sitelinks, all sitelinks are included in this table. Returns ------- dict A dict mapping indices (strs) to URLs (also strs). Indices of sitelinks are the original index appended by lowercase letters a, b, c, etc. """ return self._urltable class GooglerCmdException(Exception): pass class NoKeywordsException(GooglerCmdException): pass def require_keywords(method): # Require keywords to be set before we run a GooglerCmd method. If # no keywords have been set, raise a NoKeywordsException. @functools.wraps(method) def enforced_method(self, *args, **kwargs): if not self.keywords: raise NoKeywordsException('No keywords.') method(self, *args, **kwargs) return enforced_method def no_argument(method): # Normalize a do_* method of GooglerCmd that takes no argument to # one that takes an arg, but issue a warning when an nonempty # argument is given. @functools.wraps(method) def enforced_method(self, arg): if arg: method_name = arg.__name__ command_name = method_name[3:] if method_name.startswith('do_') else method_name logger.warning("Argument to the '%s' command ignored.", command_name) method(self) return enforced_method class GooglerCmd(object): """ Command line interpreter and executor class for googler. Inspired by PSL cmd.Cmd. Parameters ---------- opts : argparse.Namespace Options and/or arguments. Attributes ---------- options : argparse.Namespace Options that are currently in effect. Read-only attribute. keywords : str or list or strs Current keywords. Read-only attribute Methods ------- fetch() display_results(prelude='\n', json_output=False) fetch_and_display(prelude='\n', json_output=False, interactive=True) read_next_command() help() cmdloop() """ # Class variables colors = None def __init__(self, opts): super().__init__() self._opts = opts self._google_url = GoogleUrl(opts) proxy = opts.proxy if hasattr(opts, 'proxy') else None self._conn = GoogleConnection(self._google_url.hostname, proxy=proxy, notweak=opts.notweak) atexit.register(self._conn.close) self.results = [] self._autocorrected_to = None self._results_filtered = False self._urltable = {} self.promptcolor = True if os.getenv('DISABLE_PROMPT_COLOR') is None else False @property def options(self): """Current options.""" return self._opts @property def keywords(self): """Current keywords.""" return self._google_url.keywords @require_keywords def fetch(self): """Fetch a page and parse for results. Results are stored in ``self.results``. Raises ------ GoogleConnectionError See Also -------- fetch_and_display """ # This method also sets self._results_filtered and # self._urltable. page = self._conn.fetch_page(self._google_url.relative()) if logger.isEnabledFor(logging.DEBUG): import tempfile fd, tmpfile = tempfile.mkstemp(prefix='googler-response-') os.close(fd) with open(tmpfile, 'w', encoding='utf-8') as fp: fp.write(page) logger.debug("Response body written to '%s'.", tmpfile) parser = GoogleParser(news=self._google_url.news) parser.feed(page) self.results = parser.results self._autocorrected_to = parser.suggested_spelling if parser.autocorrected else None self._results_filtered = parser.filtered self._urltable = {} for r in self.results: self._urltable.update(r.urltable()) @require_keywords def display_results(self, prelude='\n', json_output=False): """Display results stored in ``self.results``. Parameters ---------- See `fetch_and_display`. """ if json_output: # JSON output import json results_object = [r.jsonizable_object() for r in self.results] print(json.dumps(results_object, indent=2, sort_keys=True, ensure_ascii=False)) else: # Regular output if not self.results: print('No results.', file=sys.stderr) else: sys.stderr.write(prelude) for r in self.results: r.print() @require_keywords def fetch_and_display(self, prelude='\n', json_output=False, interactive=True): """Fetch a page and display results. Results are stored in ``self.results``. Parameters ---------- prelude : str, optional A string that is written to stderr before showing actual results, usually serving as a separator. Default is an empty line. json_output : bool, optional Whether to dump results in JSON format. Default is False. interactive : bool, optional Whether to show contextual instructions, when e.g. Google has filtered the results. Default is True. Raises ------ GoogleConnectionError See Also -------- fetch display_results """ self.fetch() colors = self.colors if self._autocorrected_to: if colors: # Underline the keywords autocorrected_to = '\x1b[4m' + self._autocorrected_to + '\x1b[24m' else: autocorrected_to = self._autocorrected_to autocorrect_info = ('Showing results for %s; enter "x" for an exact search.' % autocorrected_to) printerr('') if colors: printerr(colors.prompt + autocorrect_info + colors.reset) else: printerr('** ' + autocorrect_info) self.display_results(prelude=prelude, json_output=json_output) if self._results_filtered: unfilter_info = 'Enter "unfilter" to show similar results Google omitted.' if colors: printerr(colors.prompt + unfilter_info + colors.reset) else: printerr('** ' + unfilter_info) printerr('') def read_next_command(self): """Show omniprompt and read user command line. Command line is always stripped, and each consecutive group of whitespace is replaced with a single space character. If the command line is empty after stripping, when ignore it and keep reading. Exit with status 0 if we get EOF or an empty line (pre-strip, that is, a raw ) twice in a row. The new command line (non-empty) is stored in ``self.cmd``. """ colors = self.colors message = 'googler (? for help)' prompt = (colors.prompt + message + colors.reset + ' ') if (colors and self.promptcolor) else (message + ': ') enter_count = 0 while True: try: cmd = input(prompt) except EOFError: sys.exit(0) if not cmd: enter_count += 1 if enter_count == 2: # Double sys.exit(0) else: enter_count = 0 cmd = ' '.join(cmd.split()) if cmd: self.cmd = cmd break @staticmethod def help(): GooglerArgumentParser.print_omniprompt_help(sys.stderr) printerr('') @require_keywords @no_argument def do_first(self): try: self._google_url.first_page() except ValueError as e: print(e, file=sys.stderr) return self.fetch_and_display() def do_google(self, arg): # Update keywords and reconstruct URL self._opts.keywords = arg self._google_url = GoogleUrl(self._opts) self.fetch_and_display() @require_keywords @no_argument def do_next(self): # If > 5 results are being fetched each time, # block next when no parsed results in current fetch if not self.results and self._google_url._num > 5: printerr('No results.') else: self._google_url.next_page() self.fetch_and_display() @require_keywords def do_open(self, *args): if not args: open_url(self._google_url.full()) return for nav in args: if nav == 'a': for key, value in sorted(self._urltable.items()): open_url(self._urltable[key]) elif nav in self._urltable: open_url(self._urltable[nav]) elif '-' in nav: try: vals = [int(x) for x in nav.split('-')] if (len(vals) != 2): printerr('Invalid range %s.' % nav) continue if vals[0] > vals[1]: vals[0], vals[1] = vals[1], vals[0] for _id in range(vals[0], vals[1] + 1): if str(_id) in self._urltable: open_url(self._urltable[str(_id)]) else: printerr('Invalid index %s.' % _id) except ValueError: printerr('Invalid range %s.' % nav) else: printerr('Invalid index %s.' % nav) @require_keywords @no_argument def do_previous(self): try: self._google_url.prev_page() except ValueError as e: print(e, file=sys.stderr) return self.fetch_and_display() @require_keywords @no_argument def do_exact(self): # Reset start to 0 when exact is applied. self._google_url.update(start=0, exact=True) self.fetch_and_display() @require_keywords @no_argument def do_unfilter(self): # Reset start to 0 when unfilter is applied. self._google_url.update(start=0) self._google_url.set_queries(filter=0) self.fetch_and_display() def cmdloop(self): """Run REPL.""" if self.keywords: self.fetch_and_display() else: printerr('Please initiate a query.') while True: self.read_next_command() # TODO: Automatic dispatcher # # We can't write a dispatcher for now because that could # change behaviour of the prompt. However, we have already # laid a lot of ground work for the dispatcher, e.g., the # `no_argument' decorator. try: cmd = self.cmd if cmd == 'f': self.do_first('') elif cmd.startswith('g '): self.do_google(cmd[2:]) elif cmd == 'n': self.do_next('') elif cmd == 'o': self.do_open() elif cmd.startswith('o '): self.do_open(*cmd[2:].split()) elif cmd.startswith('O '): open_url.override_text_browser = True self.do_open(*cmd[2:].split()) open_url.override_text_browser = False elif cmd == 'p': self.do_previous('') elif cmd == 'q': break elif cmd == 'x': self.do_exact('') elif cmd == 'unfilter': self.do_unfilter('') elif cmd == '?': self.help() elif cmd in self._urltable: open_url(self._urltable[cmd]) elif self.keywords and cmd.isdigit() and int(cmd) < 100: printerr('Index out of bound. To search for the number, use g.') elif cmd == 'u': Result.urlexpand = not Result.urlexpand printerr('url expansion toggled.') elif cmd.startswith('c ') and cmd[2:].isdigit(): try: # try copying the url to clipboard using native utilities if sys.platform.startswith(('linux', 'freebsd', 'openbsd')): if shutil.which('xsel') is None: raise FileNotFoundError copier_params = ['xsel', '-b', '-i'] elif sys.platform == 'darwin': copier_params = ['pbcopy'] elif sys.platform == 'win32': copier_params = ['clip'] else: copier_params = [] if not copier_params: printerr('operating system not identified') else: Popen(copier_params, stdin=PIPE, stdout=DEVNULL, stderr=DEVNULL).communicate(self._urltable[cmd[2:]].encode('utf-8')) except FileNotFoundError: printerr('xsel missing') except Exception: raise NoKeywordsException else: self.do_google(cmd) except NoKeywordsException: printerr('Initiate a query first.') class GooglerArgumentParser(argparse.ArgumentParser): """Custom argument parser for googler.""" # Print omniprompt help @staticmethod def print_omniprompt_help(file=None): file = sys.stderr if file is None else file file.write(textwrap.dedent(""" omniprompt keys: n, p fetch the next or previous set of search results index open the result corresponding to index in browser f jump to the first page o [index|range|a ...] open space-separated result indices, numeric ranges (sitelinks unsupported in ranges), or all, in browser open the current search in browser, if no arguments O [index|range|a ...] like key 'o', but try to open in a GUI browser g keywords new Google search for 'keywords' with original options should be used to search omniprompt keys and indices c index copy url to clipboard u toggle url expansion q, ^D, double Enter exit googler ? show omniprompt help * other inputs issue a new search with original options """)) # Print information on googler @staticmethod def print_general_info(file=None): file = sys.stderr if file is None else file file.write(textwrap.dedent(""" Version %s Copyright © 2008 Henri Hakkinen Copyright © 2015-2018 Arun Prakash Jana Zhiming Wang License: GPLv3 Webpage: https://github.com/jarun/googler """ % _VERSION_)) # Augment print_help to print more than synopsis and options def print_help(self, file=None): super().print_help(file) self.print_omniprompt_help(file) self.print_general_info(file) # Automatically print full help text on error def error(self, message): sys.stderr.write('%s: error: %s\n\n' % (self.prog, message)) self.print_help(sys.stderr) self.exit(2) # Type guards @staticmethod def positive_int(arg): """Try to convert a string into a positive integer.""" try: n = int(arg) assert n > 0 return n except (ValueError, AssertionError): raise argparse.ArgumentTypeError('%s is not a positive integer' % arg) @staticmethod def nonnegative_int(arg): """Try to convert a string into a nonnegative integer.""" try: n = int(arg) assert n >= 0 return n except (ValueError, AssertionError): raise argparse.ArgumentTypeError('%s is not a non-negative integer' % arg) @staticmethod def is_duration(arg): """Check if a string is a valid duration accepted by Google. A valid duration is of the form dNUM, where d is a single letter h (hour), d (day), w (week), m (month), or y (year), and NUM is a non-negative integer. """ try: if arg[0] not in ('h', 'd', 'w', 'm', 'y') or int(arg[1:]) < 0: raise ValueError except (TypeError, IndexError, ValueError): raise argparse.ArgumentTypeError('%s is not a valid duration' % arg) return arg @staticmethod def is_colorstr(arg): """Check if a string is a valid color string.""" try: assert len(arg) == 6 for c in arg: assert c in COLORMAP except AssertionError: raise argparse.ArgumentTypeError('%s is not a valid color string' % arg) return arg # Self-upgrade mechanism def system_is_windows(): """Checks if the underlying system is Windows (Cygwin included).""" return sys.platform in {'win32', 'cygwin'} def download_latest_googler(include_git=False): """Download latest googler to a temp file. By default, the latest released version is downloaded, but if `include_git` is specified, then the latest git master is downloaded instead. Parameters ---------- include_git : bool, optional Download from git master. Default is False. Returns ------- (git_ref, path): tuple A tuple containing the git reference (either name of the latest tag or SHA of the latest commit) and path to the downloaded file. """ import urllib.request if include_git: # Get SHA of latest commit on master request = urllib.request.Request('%s/commits/master' % API_REPO_BASE, headers={'Accept': 'application/vnd.github.v3.sha'}) response = urllib.request.urlopen(request) if response.status != 200: raise http.client.HTTPException(response.reason) git_ref = response.read().decode('utf-8') else: # Get name of latest tag request = urllib.request.Request('%s/releases?per_page=1' % API_REPO_BASE, headers={'Accept': 'application/vnd.github.v3+json'}) response = urllib.request.urlopen(request) if response.status != 200: raise http.client.HTTPException(response.reason) import json git_ref = json.loads(response.read().decode('utf-8'))[0]['tag_name'] # Download googler to a tempfile googler_download_url = '%s/%s/googler' % (RAW_DOWNLOAD_REPO_BASE, git_ref) printerr('Downloading %s' % googler_download_url) request = urllib.request.Request(googler_download_url, headers={'Accept-Encoding': 'gzip'}) import tempfile fd, path = tempfile.mkstemp() atexit.register(lambda: os.remove(path) if os.path.exists(path) else None) os.close(fd) with open(path, 'wb') as fp: with urllib.request.urlopen(request) as response: if response.status != 200: raise http.client.HTTPException(response.reason) payload = response.read() try: fp.write(gzip.decompress(payload)) except OSError: fp.write(payload) return git_ref, path def self_replace(path): """Replace the current script with a specified file. Both paths (the specified path and path to the current script) are resolved to absolute, symlink-free paths. Upon replacement, the owner and mode signatures of the current script are preserved. The caller needs to have the necessary permissions. Replacement won't happen if the specified file is the same (content-wise) as the current script. Parameters ---------- path : str Path to the replacement file. Returns ------- bool True if replaced, False if skipped (specified file is the same as the current script). """ if system_is_windows(): raise NotImplementedError('Self upgrade not supported on Windows.') import filecmp import shutil path = os.path.realpath(path) self_path = os.path.realpath(__file__) if filecmp.cmp(path, self_path): return False self_stat = os.stat(self_path) os.chown(path, self_stat.st_uid, self_stat.st_gid) os.chmod(path, self_stat.st_mode) shutil.move(path, self_path) return True def self_upgrade(include_git=False): """Perform in-place self-upgrade. Parameters ---------- include_git : bool, optional See `download_latest_googler`. Default is False. """ git_ref, path = download_latest_googler(include_git=include_git) if self_replace(path): printerr('Upgraded to %s.' % git_ref) else: printerr('Already up to date.') # Miscellaneous functions def python_version(): return '%d.%d.%d' % sys.version_info[:3] def https_proxy_from_environment(): return os.getenv('https_proxy') def parse_proxy_spec(proxyspec): if '://' in proxyspec: pos = proxyspec.find('://') scheme = proxyspec[:pos] proxyspec = proxyspec[pos+3:] if scheme.lower() != 'http': # Only support HTTP proxies. # # In particular, we don't support HTTPS proxies since we # only speak plain HTTP to the proxy server, so don't give # users a false sense of security. raise NotImplementedError('Unsupported proxy scheme %s.' % scheme) if '@' in proxyspec: pos = proxyspec.find('@') user_passwd = urllib.parse.unquote(proxyspec[:pos]) host_port = proxyspec[pos+1:] else: user_passwd = None host_port = proxyspec if ':' not in host_port: # Use port 1080 as default, following curl. host_port += ':1080' return user_passwd, host_port # Query autocompleter # This function is largely experimental and could raise any exception; # you should be prepared to catch anything. When it works though, it # returns a list of strings the prefix could autocomplete to (however, # it is not guaranteed that they start with the specified prefix; for # instance, they won't if the specified prefix ends in a punctuation # mark.) def completer_fetch_completions(prefix): import json import re import urllib.request # One can pass the 'hl' query param to specify the language. We # ignore that for now. api_url = ('https://www.google.com/complete/search?client=psy-ab&q=%s' % urllib.parse.quote(prefix, safe='')) # A timeout of 3 seconds seems to be overly generous already. resp = urllib.request.urlopen(api_url, timeout=3) respobj = json.loads(resp.read().decode('utf-8')) # The response object, once parsed as JSON, should look like # # ['git', # [['github', 0], # ['git', 0], # ['gitlab', 0], # ['git stash', 0]], # {'q': 'oooAhRzoChqNmMbNaaDKXk1YY4k', 't': {'bpc': False, 'tlw': False}}] # # Note the each result entry need not have two members; e.g., for # 'gi', there is an entry ['gif', 0, [131]]. HTML_TAG = re.compile(r'<[^>]+>') return [HTML_TAG.sub('', entry[0]) for entry in respobj[1]] def completer_run(prefix): if prefix: completions = completer_fetch_completions(prefix) if completions: print('\n'.join(completions)) sys.exit(0) def parse_args(args=None, namespace=None): """Parse googler arguments/options. Parameters ---------- args : list, optional Arguments to parse. Default is ``sys.argv``. namespace : argparse.Namespace Namespace to write to. Default is a new namespace. Returns ------- argparse.Namespace Namespace with parsed arguments / options. """ colorstr_env = os.getenv('GOOGLER_COLORS') argparser = GooglerArgumentParser(description='Google from the command-line.') addarg = argparser.add_argument addarg('-s', '--start', type=argparser.nonnegative_int, default=0, metavar='N', help='start at the Nth result') addarg('-n', '--count', dest='num', type=argparser.positive_int, default=10, metavar='N', help='show N results (default 10)') addarg('-N', '--news', action='store_true', help='show results from news section') addarg('-c', '--tld', metavar='TLD', help="""country-specific search with top-level domain .TLD, e.g., 'in' for India""") addarg('-l', '--lang', metavar='LANG', help='display in language LANG') addarg('-x', '--exact', action='store_true', help='disable automatic spelling correction') addarg('-C', '--nocolor', dest='colorize', action='store_false', help='disable color output') addarg('--colors', dest='colorstr', type=argparser.is_colorstr, default=colorstr_env if colorstr_env else 'GKlgxy', metavar='COLORS', help='set output colors (see man page for details)') addarg('-j', '--first', '--lucky', dest='lucky', action='store_true', help='open the first result in web browser and exit') addarg('-t', '--time', dest='duration', type=argparser.is_duration, metavar='dN', help='time limit search ' '[h5 (5 hrs), d5 (5 days), w5 (5 weeks), m5 (5 months), y5 (5 years)]') addarg('-w', '--site', dest='sites', action='append', metavar='SITE', help='search a site using Google') addarg('--unfilter', action='store_true', help='do not omit similar results') addarg('-p', '--proxy', default=https_proxy_from_environment(), help="""tunnel traffic through an HTTP proxy; PROXY is of the form [http://][user:password@]proxyhost[:port]""") addarg('--noua', action='store_true', help='disable user agent') addarg('--notweak', action='store_true', help='disable TCP optimizations and forced TLS 1.2') addarg('--json', action='store_true', help='output in JSON format; implies --noprompt') addarg('--url-handler', metavar='UTIL', help='custom script or cli utility to open results') addarg('--show-browser-logs', action='store_true', help='do not suppress browser output (stdout and stderr)') addarg('--np', '--noprompt', dest='noninteractive', action='store_true', help='search and exit, do not prompt') addarg('keywords', nargs='*', metavar='KEYWORD', help='search keywords') if ENABLE_SELF_UPGRADE_MECHANISM and not system_is_windows(): addarg('-u', '--upgrade', action='store_true', help='perform in-place self-upgrade') addarg('--include-git', action='store_true', help='when used with --upgrade, upgrade to latest git master') addarg('-v', '--version', action='version', version=_VERSION_) addarg('-d', '--debug', action='store_true', help='enable debugging') addarg('--complete', help=argparse.SUPPRESS) return argparser.parse_args(args, namespace) def main(): global ua try: opts = parse_args() # Set logging level if opts.debug: logger.setLevel(logging.DEBUG) logger.debug('googler version %s', _VERSION_) logger.debug('Python version %s', python_version()) # Handle query completer if opts.complete is not None: completer_run(opts.complete) # Handle self-upgrade if hasattr(opts, 'upgrade') and opts.upgrade: self_upgrade(include_git=opts.include_git) sys.exit(0) check_stdout_encoding() if opts.keywords: try: # Add cmdline args to readline history readline.add_history(' '.join(opts.keywords)) except Exception: pass # Set colors if opts.colorize: colors = Colors(*[COLORMAP[c] for c in opts.colorstr], reset=COLORMAP['x']) else: colors = None Result.colors = colors Result.urlexpand = True if os.getenv('DISABLE_URL_EXPANSION') is None else False GooglerCmd.colors = colors if opts.url_handler is not None: open_url.url_handler = opts.url_handler else: # Set text browser override to False open_url.override_text_browser = False # Handle browser output suppression if opts.show_browser_logs or (os.getenv('BROWSER') in text_browsers): open_url.suppress_browser_output = False else: open_url.suppress_browser_output = True if opts.noua: logger.debug('User Agent is disabled') ua = False repl = GooglerCmd(opts) if opts.json or opts.lucky or opts.noninteractive: # Non-interactive mode repl.fetch() if opts.lucky: if repl.results: open_url(repl.results[0].url) else: print('No results.', file=sys.stderr) else: repl.display_results(prelude='', json_output=opts.json) sys.exit(0) else: # Interactive mode repl.cmdloop() except Exception as e: # With debugging on, let the exception through for a traceback; # otherwise, only print the exception error message. if logger.isEnabledFor(logging.DEBUG): raise else: logger.error(e) sys.exit(1) if __name__ == '__main__': main() googler-3.5/googler.1000066400000000000000000000264021324143632700145500ustar00rootroot00000000000000.TH "GOOGLER" "1" "16 Feb 2018" "Version 3.5" "User Commands" .SH NAME googler \- Google from the command-line .SH SYNOPSIS .B googler [OPTIONS] [KEYWORD [KEYWORD ...]] .SH DESCRIPTION .B googler is a command-line tool to search Google (Web & News) from the terminal. Google site search works too. \fBgoogler\fR shows the title, URL and text context for each result. Results are fetched in pages. Next or previous page navigation is possible using keyboard shortcuts. Results are indexed and a result URL can be opened in a browser using the index number. There is no configuration file as aliases serve the same purpose for this utility. Supports sequential searches in a single instance. .PP .B Features .PP * Google Search, Google Site Search, Google News * Fast and clean (no ads, stray URLs or clutter), custom color * Navigate result pages from omniprompt, open URLs in browser * Effortless keyword-based site search with googler @t add-on * Search and option completion scripts for Bash, Zsh and Fish * Fetch n results in a go, start at the nth result * Disable automatic spelling correction and search exact keywords * Specify duration, country/domain (default: worldwide/.com), language * Google keywords (e.g. \fIfiletype:mime\fR, \fIsite:somesite.com\fR) support * Open the first result directly in browser (as in I'm Feeling Lucky) * Non-stop searches: fire new searches at omniprompt without exiting * HTTPS proxy, User Agent, TLS 1.2 (default) support * Comprehensive documentation, man page with handy usage examples * Minimal dependencies .SH OPTIONS .TP .BI "-h, --help" Show help text and exit. .TP .BI "-s, --start=" N Start at the \fIN\fRth result. .TP .BI "-n, --count=" N Show \fIN\fR results (default 10). .TP .BI "-N, --news" Show results from news section. .TP .BI "-c, --tld=" TLD Country-specific search with top-level domain \fI.TLD\fR, e.g., \fBin\fR for India. .TP .BI "-l, --lang=" LANG Search for the language \fILANG\fR, e.g., \fBfi\fR for Finnish. .TP .B "-x, --exact" Disable automatic spelling correction. Search exact keywords. .TP .B "-C, --nocolor" Disable color output. .TP .BI "--colors=" COLORS Set output colors. Refer to the \fBCOLORS\fR section below for details. .TP .B "-j, --first, --lucky" Open the first result in a web browser; implies \fB--noprompt\fR. Feeling Lucky? .TP .BI "-t, --time=" dN Time limit search [h5 (5 hrs), d5 (5 days), w5 (5 weeks), m5 (5 months), y5 (5 years)]. .TP .BI "-w, --site=" SITE Search a site using Google. .TP .BI "--unfilter" Do not omit similar results. .TP .BI "-p, --proxy=" PROXY Tunnel traffic through an HTTP proxy. \fIPROXY\fR is of the form \fI[http://][user:password@]proxyhost[:port]\fR. The proxy server must support HTTP CONNECT tunneling and must not block port 443 for the relevant Google hosts. If a proxy is not explicitly given, the \fIhttps_proxy\fR environment variable (if available) is used instead. .TP .BI "--noua" Disable user agent. Results are fetched faster. .TP .BI "--notweak" Disable TCP optimizations. Negotiate Transport Layer Security protocol instead of forcing TLS 1.2 (on Python 3.4 and above). Should be used only in case of connection issues. .TP .BI "--json" Output in JSON format; implies \fB--noprompt\fR. .TP .BI "--url-handler=" UTIL Custom script or command-line utility to open urls with. .TP .BI "--show-browser-logs" Do not suppress browser output when opening result in browser; that is, connect stdout and stderr of the browser to googler's stdout and stderr instead of /dev/null. By default, browser output is suppressed (due to certain graphical browsers spewing messages to console) unless the \fBBROWSER\fR environment variable is a known text-based browser: elinks, links, lynx, w3m or www-browser. .TP .BI "--np, --noprompt" Perform search and exit; do not prompt for further interactions. .TP .BI "-u, --upgrade" Perform in-place self-upgrade. By default, the latest stable version is used. However, the latest git master is used instead if \fB--include-git\fR is also supplied. This mechanism is not available on Windows (including Cygwin), and if you installed \fBgoogler\fR with a package manager, this mechanism may have been disabled by your packager at packaging or install time. .TP .BI "--include-git" See \fB--upgrade\fR. .TP .BI "-v, --version" Show version number and exit. .TP .BI "-d, --debug" Enable debugging. .SH OMNIPROMPT KEYS .TP .BI "n, p" Fetch the next or previous set of search results. .TP .BI "index" Open the result corresponding to index in browser. .TP .BI "f" Jump to the first page. .TP .BI o " [index|range|a ...]" Open space-separated result indices, numeric ranges (sitelinks unsupported in ranges) or all indices, if 'a' is specified, in the browser. Open the current search in the browser, if no arguments. .TP .BI O " [index|range|a ...]" Works similar to key 'o', but tries to ignore text-based browsers (even if BROWSER is set) and open links in a GUI browser. .TP .BI g " keywords" Initiate a new Google search for \fIkeywords\fR with original options. This key should be used to search omniprompt keys (including itself) and indices. .TP .BI "c index" Copy url to clipboard. .TP .BI "u" Toggle url expansion. .TP .BI "q, ^D, double Enter" Exit googler. .TP .BI "?" Show omniprompt help. .TP .BI * Any other string initiates a new search with original options. .SH GOOGLER @T \fBgoogler @t\fR is a convenient add-on to Google Site Search with unique keywords. While \fBgoogler\fR has an integrated option to search a site, it could be simplified further with aliases. The file \fIgoogler_at\fR (https://github.com/jarun/googler/blob/master/auto-completion/googler_at/googler_at) contains a list of website search aliases. To source it, run: .PP .IP "" 4 .B source googler_at .PP or .PP .IP "" 4 .B . googler_at .PP With \fBgoogler @t\fR, the following command searches Wikipedia for \fIhexspeak\fR: .PP .IP "" 4 .B @w hexspeak .PP Other \fBgoogler\fR options can be combined. The shell can be configured to be source the file at start-up for further convenience. .PP All the aliases start with the \fB@\fR symbol (hence the name \fBgoogler @t\fR) and there is minimum chance they will conflict with any shell commands. Users can add new aliases to the file. .SH COLORS \fBgoogler\fR allows you to customize the color scheme via a six-letter string, reminiscent of BSD \fBLSCOLORS\fR. The six letters represent the colors of .IP - 2 indices .PD 0 \" Change paragraph spacing to 0 in the list .IP - 2 titles .IP - 2 URLs .IP - 2 metadata/publishing info (Google News only) .IP - 2 abstracts .IP - 2 prompts .PD 1 \" Restore paragraph spacing .TP respectively. The six-letter string is passed in either as the argument to the \fB--colors\fR option, or as the value of the environment variable \fBGOOGLER_COLORS\fR. .TP We offer the following colors/styles: .TS tab(;) box; l|l -|- l|l. Letter;Color/Style a;black b;red c;green d;yellow e;blue f;magenta g;cyan h;white i;bright black j;bright red k;bright green l;bright yellow m;bright blue n;bright magenta o;bright cyan p;bright white A-H;bold version of the lowercase-letter color I-P;bold version of the lowercase-letter bright color x;normal X;bold y;reverse video Y;bold reverse video .TE .TP .TP The default colors string is \fIGKlgxy\fR, which stands for .IP - 2 bold bright cyan indices .PD 0 \" Change paragraph spacing to 0 in the list .IP - 2 bold bright green titles .IP - 2 bright yellow URLs .IP - 2 cyan metadata/publishing info .IP - 2 normal abstracts .IP - 2 reverse video prompts .PD 1 \" Restore paragraph spacing .TP Note that .IP - 2 Bright colors (implemented as \\x1b[90m - \\x1b[97m) may not be available in all color-capable terminal emulators; .IP - 2 Some terminal emulators draw bold text in bright colors instead; .IP - 2 Some terminal emulators only distinguish between bold and bright colors via a default-off switch. .TP Please consult the manual of your terminal emulator as well as \fIhttps://en.wikipedia.org/wiki/ANSI_escape_code\fR for details. .SH ENVIRONMENT .TP .BI BROWSER Overrides the default browser. Ref: .I http://docs.python.org/library/webbrowser.html .TP .BI GOOGLER_COLORS Refer to the \fBCOLORS\fR section. .TP .BI DISABLE_PROMPT_COLOR Force a plain omniprompt if you are facing issues with colors at the prompt. .TP .BI https_proxy Refer to the \fB--proxy\fR option. .TP .BI DISABLE_URL_EXPANSION Show the domain names in search results instead of the expanded URL. .SH EXAMPLES .PP .IP 1. 4 Google \fBhello world\fR: .PP .EX .IP .B googler hello world .EE .PP .IP 2. 4 Fetch \fB15 results\fR updated within the last \fB14 months\fR, starting from the \fB3rd result\fR for the keywords \fBjungle book\fR in \fBsite\fR imdb.com: .PP .EX .IP .B googler -n 15 -s 3 -t m14 -w imdb.com jungle book .EE .PP .IP 3. 4 Read recent \fBnews\fR on gadgets: .PP .EX .IP .B googler -N gadgets .EE .PP .IP 4. 4 Fetch results on IPL cricket from \fBGoogle India\fR server in \fBEnglish\fR: .PP .EX .IP .B googler -c in -l en IPL cricket .EE .PP .IP 5. 4 Search \fBquoted text\fR: .PP .EX .IP .B googler it\(rs's a \(rs\(dqbeautiful world\(rs\(dq in spring .EE .PP .IP 6. 4 Search for a \fBspecific file type\fR: .PP .EX .IP .B googler instrumental filetype:mp3 .EE .PP .IP 7. 4 Disable \fBautomatic spelling correction\fR, e.g. fetch results for \fIgoogler\fR instead of \fIgoogle\fR: .PP .EX .IP .B googler -x googler .EE .PP .IP 8. 4 \fBI'm feeling lucky\fR search: .PP .EX .IP .B googler -j leather jackets .EE .PP .IP 9. 4 \fBWebsite specific\fR search: .PP .EX .IP .B googler -w amazon.com -w ebay.com digital camera .EE .PP .IP "" 4 Site specific search continues at omniprompt. .EE .PP .IP 10. 4 Alias to find \fBdefinitions of words\fR: .PP .EX .IP .B alias define='googler -n 2 define' .EE .PP .IP 11. 4 Look up \fBn\fR, \fBp\fR, \fBo\fR, \fBO\fR, \fBq\fR, \fBg keywords\fR or a result index at the \fBomniprompt\fR: as the omniprompt recognizes these keys or index strings as commands, you need to prefix them with \fBg\fR, e.g., .PP .EX .PD 0 .IP .B g n .IP .B g g keywords .IP .B g 1 .PD .EE .PP .IP 12. 4 Input and output \fBredirection\fR: .PP .EX .IP .B googler -C hello world < input > output .EE .PP .IP "" 4 Note that \fI-C\fR is required to avoid printing control characters (for colored output). .IP 13. 4 \fBPipe\fR output: .PP .EX .IP .B googler -C hello world | tee output .EE .IP 14. 4 Use a \fBcustom color scheme\fR, e.g., one warm color scheme designed for Solarized Dark: .PP .EX .IP .B googler --colors bjdxxy google .IP .B GOOGLER_COLORS=bjdxxy googler google .EE .IP 15. 4 Tunnel traffic through an \fBHTTPS proxy\fR, e.g., a local Privoxy instance listening on port 8118: .PP .EX .IP .B googler --proxy localhost:8118 google .EE .PP .IP "" 4 By default the environment variable \fIhttps_proxy\fR is used, if defined. .IP 16. 4 Quote multiple search keywords to auto-complete (using completion script): .PP .EX .IP .B googler 'hello w .EE .SH AUTHORS Henri Hakkinen .br Arun Prakash Jana .br Zhiming Wang .SH HOME .I https://github.com/jarun/googler .SH REPORTING BUGS .I https://github.com/jarun/googler/issues .SH LICENSE Copyright \(co 2008 Henri Hakkinen .br Copyright \(co 2015-2018 Arun Prakash Jana .PP License GPLv3+: GNU GPL version 3 or later . .br This is free software: you are free to change and redistribute it. There is NO WARRANTY, to the extent permitted by law. googler-3.5/googler.svg000066400000000000000000000527051324143632700152140ustar00rootroot00000000000000 googler-light Created with Sketch. googler-3.5/packagecore.yaml000066400000000000000000000015231324143632700161550ustar00rootroot00000000000000name: googler maintainer: Arun Prakash Jana license: GPLv3 summary: Google from the command-line. homepage: https://github.com/jarun/googler commands: install: - make PREFIX="/usr" install DESTDIR="${BP_DESTDIR}" packages: archlinux: builddeps: - make deps: - python centos7.3: builddeps: - make deps: - python commands: pre: - yum install epel-release debian9: builddeps: - make deps: - python3 fedora26: builddeps: - make deps: - python3 fedora27: builddeps: - make deps: - python3 opensuse42.3: builddeps: - make deps: - python3 ubuntu16.04: builddeps: - make deps: - python3 ubuntu17.10: builddeps: - make deps: - python3 googler-3.5/tests/000077500000000000000000000000001324143632700141665ustar00rootroot00000000000000googler-3.5/tests/ci-test-wrapper000077500000000000000000000056431324143632700171520ustar00rootroot00000000000000#!/usr/bin/env bash set -e declare here repo_root test_script here="$(perl -e 'use File::Basename; use Cwd "abs_path"; print dirname(abs_path(@ARGV[0]));' -- "$0")" repo_root="$here/.." test_script="$here/test" export GIT_DIR="$here/../.git" declare -a watchlist watchlist=(googler tests) while [[ $1 == -* ]]; do case $1 in -h|--help) cat <<'EOF' Usage: ci-test-wrapper [-h|--help] [--monitor PATH [PATH ...]] googler(1) testing wrapper for CIs. Options: -h, --help Print this help and exit. --watch PATH [PATH ...] Additional paths (relative to repository root) to watch. Only run tests when watched paths have been modified. By default only googler and tests/ are watched, but sometimes additional paths should be watched depending on circumstances, e.g., for Travis, .travis.yml should also be watched. Note that this option consumes all of the remaining command line arguments. EOF exit 1 ;; --watch) shift watchlist=( "${watchlist[@]}" "$@" ) shift $# break ;; *) printf '\033[31mError: Unrecognized option %q.\033[0m\n' "$1" >&2 exit 1 ;; esac shift done (( $# > 0 )) && { printf '\033[31mError: Unrecognized arguments %s.\033[0m\n' "$*" >&2 exit 1 } # Abort if the CI_SKIP_TEST environment variable is detected. if [[ -n $CI_SKIP_TEST ]]; then printf 'Detected $CI_SKIP_TEST. Skipping tests.' >&2 exit fi # Diff HEAD against a base commit to see if the changes are worth # testing. (This check is skipped entirely if the CI_FORCE_TEST environment # variable is set and non-nil.) # # * For a regular branch, diff against HEAD^; # * For a PR branch, diff against the merge base of HEAD and master. # # Currently we use $TRAVIS_PULL_REQUEST to determine whether we're building a # PR branch. Other criteria may be added if we ever expand to other CIs. if [[ -z $CI_FORCE_TEST ]]; then printf 'We are watching the following paths:\n' >&2 printf ' - %s\n' "${watchlist[@]}" >&2 printf '\n' >&2 declare diff_commits diff if [[ -z ${TRAVIS_PULL_REQUEST+x} || $TRAVIS_PULL_REQUEST == false ]]; then diff_commits='HEAD^..HEAD' else diff_commits='master...HEAD' fi diff=$(git -C "$repo_root" diff "$diff_commits" -- "${watchlist[@]}") if [[ -z $diff ]]; then printf 'None of the watchlist items changed, skipping tests.\n' >&2 printf 'You may set the $CI_FORCE_TEST environment variable to force testing.\n' >&2 exit 0 else printf 'Changes to watchlist item(s) detected. Will test.\n\n' >&2 fi else printf 'Detected $CI_FORCE_TEST. Skipping necessity checks.\n\n' >&2 fi # Test googler(1) with $repo_root at the beginning of $PATH (so that googler # from this repo is picked up). PATH="$repo_root:$PATH" "$test_script" --ci googler-3.5/tests/googler.py000077700000000000000000000000001324143632700177652../googlerustar00rootroot00000000000000googler-3.5/tests/parse000077500000000000000000000015761324143632700152370ustar00rootroot00000000000000#!/usr/bin/env python3 """Parse saved responses with GoogleParser.""" import argparse import json import googler def main(): argparser = argparse.ArgumentParser(description='Parse Google responses.') argparser.add_argument('-N', '--news', action='store_true', help='parse as Google News responses') argparser.add_argument('files', nargs='+', metavar='FILE', help="HTML file with Google's response body") args = argparser.parse_args() for fn in args.files: with open(fn, encoding='utf-8') as fp: htmlparser = googler.GoogleParser(news=args.news) htmlparser.feed(fp.read()) results_object = [r.jsonizable_object() for r in htmlparser.results] print(json.dumps(results_object, indent=2, sort_keys=True, ensure_ascii=False)) if __name__ == '__main__': main() googler-3.5/tests/test000077500000000000000000000112521324143632700150740ustar00rootroot00000000000000#!/usr/bin/env bash set -e declare quiet exitcode quiet=0 ci=0 exitcode=0 while [[ $1 == -* ]]; do case $1 in --ci) ci=1 quiet=1 ;; -h|--help) cat <<'EOF' Usage: test [options] Run automated tests of googler(1). googler(1) is expected on $PATH. Requires shuf(1) from coreutils and /usr/share/dict/words. Options: --ci Same to --quiet, except LF is used instead of CR when printing progress information. The reason is that CI logs are typically line buffered, so CR won't flush the output, rendering progress info useless. -h, --help Print this help and exit. -q, --quiet Suppress googler's output except when a test fails. Some progress info is still printed to stderr. Note that without this option, this script is rather verbose. Environment variables: NUM_TEST_ITERATIONS Number of random tests to run. Default is 100. SLEEP_DURATION Number of seconds to sleep after each query. Default is 0. You may want to set this to avoid being blocked by Google for spamming. EOF exit 1 ;; -q|--quiet) quiet=1 ;; *) printf '\033[31mError: Unrecognized option %q.\033[0m\n' "$1" >&2 exit 1 ;; esac shift done [[ $# -gt 0 ]] && { printf '\033[31mError: Unrecognized argument %q.\033[0m\n' "$1" >&2 exit 1 } declare num_rand_words declare -a predefined_wordlist random_wordlist tld_args lang_args # A UTF-8 wordlist. predefined_wordlist=('汉语' 'español' 'português' 'ру́сский язы́к' '日本語' '한국어' 'le français') # Requires shuf(1). command -v shuf &>/dev/null || { printf '\033[31mError: shuf(1) not found.\033[0m\n' >&2 exit 1 } num_rand_words=10 random_wordlist=( $(shuf -n $num_rand_words /usr/share/dict/words 2>/dev/null) ) [[ ${#random_wordlist[@]} == $num_rand_words ]] || { printf '\033[31mError: Problem reading random words from /usr/share/dict/words.\033[0m\n' >&2 exit 1 } # Test googler with the given options, and report error if necessary. # # Whether googler's output is suppressed depends on whether the global variable # quiet is truthy (set by -q, --quiet); when a failure is encountered, the # global variable exitcode is set to 1, and if quiet was set, the test is rerun # with output turned on. test_googler () { report_error () { local last_status=$? declare -g exitcode local rerun=0 [[ $1 == --rerun ]] && { rerun=1 shift } printf '\033[31mError: googler ' >&2 printf '%q ' "$@" >&2 printf 'failed with status %d.\033[0m\n' $last_status >&2 exitcode=1 (( rerun )) && { googler --noprompt -d "$@"; printf '\n\033[33m[Exit status] %d\033[0m\n' $?; } || : } declare -g quiet if (( quiet )); then googler --noprompt -d "$@" &>/dev/null || report_error --rerun "$@" else printf '\033[34m==> googler ' >&2 printf '%q ' "$@" >&2 printf '\033[0m\n' >&2 googler --noprompt -d "$@" || report_error "$@" echo fi } # Write a list of configurations to $config_list, and later randomly pick from # that list. (The reason we don't test them all is that Google would block us # after thousands of queries.) declare config_list config_list="$(mktemp)" trap 'rm -f "$config_list"' EXIT for tld in com ar au be br ca ch cz de es 'fi' fr id 'in' it jp kr mx nl ph pl pt ro ru se tw ua uk; do [[ $tld != com ]] && tld_args=(-c $tld) || tld_args=() for lang in default de en fr hi ja ko zh; do [[ $lang != default ]] && lang_args=(-l $lang) || lang_args=() # Test single word queries. for keyword in "${predefined_wordlist[@]}" "${random_wordlist[@]}"; do printf '%s ' "${tld_args[@]}" "${lang_args[@]}" "$keyword" echo done # Test double word queries. for (( i = 0; i + 1 < num_rand_words; i += 2 )); do printf '%s ' "${tld_args[@]}" "${lang_args[@]}" \ "${random_wordlist[i]}" "${random_wordlist[i+1]}" echo done done done >"$config_list" declare num_rand_configs num_rand_configs="${NUM_TEST_ITERATIONS:-100}" counter=0 while read -r args; do (( counter++ )) || : printf '\033[32mTest %d/%d\033[0m' $counter $num_rand_configs >&2 (( quiet && !ci )) && printf '\r' >&2 || printf '\n' >&2 test_googler $args # explicit word splitting here, yes sleep "${SLEEP_DURATION:-0}" done < <(shuf -n $num_rand_configs "$config_list") (( exitcode )) || printf '\033[K\033[32mAll passed.\033[0m\n' exit $exitcode