pax_global_header00006660000000000000000000000064132505140700014507gustar00rootroot0000000000000052 comment=7adc1a4e70a22ba14ad7a8333d59a35719fbe9e0 django-haystack-2.8.0/000077500000000000000000000000001325051407000145655ustar00rootroot00000000000000django-haystack-2.8.0/.gitchangelog.rc000066400000000000000000000141321325051407000176250ustar00rootroot00000000000000## ## Format ## ## ACTION: [AUDIENCE:] COMMIT_MSG [!TAG ...] ## ## Description ## ## ACTION is one of 'chg', 'fix', 'new' ## ## Is WHAT the change is about. ## ## 'chg' is for refactor, small improvement, cosmetic changes... ## 'fix' is for bug fixes ## 'new' is for new features, big improvement ## ## AUDIENCE is optional and one of 'dev', 'usr', 'pkg', 'test', 'doc' ## ## Is WHO is concerned by the change. ## ## 'dev' is for developpers (API changes, refactors...) ## 'usr' is for final users (UI changes) ## 'pkg' is for packagers (packaging changes) ## 'test' is for testers (test only related changes) ## 'doc' is for doc guys (doc only changes) ## ## COMMIT_MSG is ... well ... the commit message itself. ## ## TAGs are additionnal adjective as 'refactor' 'minor' 'cosmetic' ## ## They are preceded with a '!' or a '@' (prefer the former, as the ## latter is wrongly interpreted in github.) Commonly used tags are: ## ## 'refactor' is obviously for refactoring code only ## 'minor' is for a very meaningless change (a typo, adding a comment) ## 'cosmetic' is for cosmetic driven change (re-indentation, 80-col...) ## 'wip' is for partial functionality but complete subfunctionality. ## ## Example: ## ## new: usr: support of bazaar implemented ## chg: re-indentend some lines !cosmetic ## new: dev: updated code to be compatible with last version of killer lib. ## fix: pkg: updated year of licence coverage. ## new: test: added a bunch of test around user usability of feature X. ## fix: typo in spelling my name in comment. !minor ## ## Please note that multi-line commit message are supported, and only the ## first line will be considered as the "summary" of the commit message. So ## tags, and other rules only applies to the summary. The body of the commit ## message will be displayed in the changelog without reformatting. ## ## ``ignore_regexps`` is a line of regexps ## ## Any commit having its full commit message matching any regexp listed here ## will be ignored and won't be reported in the changelog. ## ignore_regexps = [ r'@minor', r'!minor', r'@cosmetic', r'!cosmetic', r'@refactor', r'!refactor', r'@wip', r'!wip', r'^([cC]hg|[fF]ix|[nN]ew)\s*:\s*[p|P]kg:', r'^([cC]hg|[fF]ix|[nN]ew)\s*:\s*[d|D]ev:', r'^(.{3,3}\s*:)?\s*[fF]irst commit.?\s*$', ] ## ``section_regexps`` is a list of 2-tuples associating a string label and a ## list of regexp ## ## Commit messages will be classified in sections thanks to this. Section ## titles are the label, and a commit is classified under this section if any ## of the regexps associated is matching. ## section_regexps = [ ('New', [ r'^[nN]ew\s*:\s*((dev|use?r|pkg|test|doc)\s*:\s*)?([^\n]*)$', ]), ('Changes', [ r'^[cC]hg\s*:\s*((dev|use?r|pkg|test|doc)\s*:\s*)?([^\n]*)$', ]), ('Fix', [ r'^[fF]ix\s*:\s*((dev|use?r|pkg|test|doc)\s*:\s*)?([^\n]*)$', ]), ('Other', None ## Match all lines ), ] ## ``body_process`` is a callable ## ## This callable will be given the original body and result will ## be used in the changelog. ## ## Available constructs are: ## ## - any python callable that take one txt argument and return txt argument. ## ## - ReSub(pattern, replacement): will apply regexp substitution. ## ## - Indent(chars=" "): will indent the text with the prefix ## Please remember that template engines gets also to modify the text and ## will usually indent themselves the text if needed. ## ## - Wrap(regexp=r"\n\n"): re-wrap text in separate paragraph to fill 80-Columns ## ## - noop: do nothing ## ## - ucfirst: ensure the first letter is uppercase. ## (usually used in the ``subject_process`` pipeline) ## ## - final_dot: ensure text finishes with a dot ## (usually used in the ``subject_process`` pipeline) ## ## - strip: remove any spaces before or after the content of the string ## ## Additionally, you can `pipe` the provided filters, for instance: #body_process = Wrap(regexp=r'\n(?=\w+\s*:)') | Indent(chars=" ") #body_process = Wrap(regexp=r'\n(?=\w+\s*:)') #body_process = noop body_process = ReSub(r'((^|\n)[A-Z]\w+(-\w+)*: .*(\n\s+.*)*)+$', r'') | strip ## ``subject_process`` is a callable ## ## This callable will be given the original subject and result will ## be used in the changelog. ## ## Available constructs are those listed in ``body_process`` doc. subject_process = (strip | ReSub(r'^([cC]hg|[fF]ix|[nN]ew)\s*:\s*((dev|use?r|pkg|test|doc)\s*:\s*)?([^\n@]*)(@[a-z]+\s+)*$', r'\4') | ucfirst | final_dot) ## ``tag_filter_regexp`` is a regexp ## ## Tags that will be used for the changelog must match this regexp. ## tag_filter_regexp = r'^v[0-9]+\.[0-9]+(\.[0-9]+)?$' ## ``unreleased_version_label`` is a string ## ## This label will be used as the changelog Title of the last set of changes ## between last valid tag and HEAD if any. unreleased_version_label = "%%version%% (unreleased)" ## ``output_engine`` is a callable ## ## This will change the output format of the generated changelog file ## ## Available choices are: ## ## - rest_py ## ## Legacy pure python engine, outputs ReSTructured text. ## This is the default. ## ## - mustache() ## ## Template name could be any of the available templates in ## ``templates/mustache/*.tpl``. ## Requires python package ``pystache``. ## Examples: ## - mustache("markdown") ## - mustache("restructuredtext") ## ## - makotemplate() ## ## Template name could be any of the available templates in ## ``templates/mako/*.tpl``. ## Requires python package ``mako``. ## Examples: ## - makotemplate("restructuredtext") ## output_engine = rest_py #output_engine = mustache("restructuredtext") #output_engine = mustache("markdown") #output_engine = makotemplate("restructuredtext") ## ``include_merge`` is a boolean ## ## This option tells git-log whether to include merge commits in the log. ## The default is to include them. include_merge = True django-haystack-2.8.0/.github/000077500000000000000000000000001325051407000161255ustar00rootroot00000000000000django-haystack-2.8.0/.github/issue_template.md000066400000000000000000000004671325051407000215010ustar00rootroot00000000000000* [ ] Tested with the latest Haystack release * [ ] Tested with the current Haystack master branch ## Expected behaviour ## Actual behaviour ## Steps to reproduce the behaviour 1. ## Configuration * Operating system version: * Search engine version: * Python version: * Django version: * Haystack version:django-haystack-2.8.0/.github/pull_request_template.md000066400000000000000000000010151325051407000230630ustar00rootroot00000000000000# Hey, thanks for contributing to Haystack. Please review [the contributor guidelines](https://django-haystack.readthedocs.io/en/latest/contributing.html) and confirm that [the tests pass](https://django-haystack.readthedocs.io/en/latest/running_tests.html) with at least one search engine. # Once your pull request has been submitted, the full test suite will be executed on https://travis-ci.org/django-haystack/django-haystack/pull_requests. Pull requests with passing tests are far more likely to be reviewed and merged.django-haystack-2.8.0/.gitignore000066400000000000000000000003041325051407000165520ustar00rootroot00000000000000.settings *.pyc .DS_Store _build .*.sw[po] *.egg-info dist build MANIFEST .tox env env3 *.egg .eggs .coverage .idea # Build artifacts from test setup *.tgz test_haystack/solr_tests/server/solr4/ django-haystack-2.8.0/.isort.cfg000066400000000000000000000001211325051407000164560ustar00rootroot00000000000000[settings] line_length=110 known_first_party=haystack default_section=THIRDPARTY django-haystack-2.8.0/.travis.yml000066400000000000000000000043051325051407000167000ustar00rootroot00000000000000sudo: required dist: trusty language: python python: - 2.7 - 3.4 - 3.5 - 3.6 - pypy cache: apt: true pip: true directories: - $HOME/download-cache jdk: - oraclejdk8 addons: apt_packages: - binutils - default-jdk - gdal-bin - libgdal1h - libgeos-c1 - libproj-dev - libxapian22 - python-xapian - wajig before_install: - mkdir -p $HOME/download-cache # See https://www.elastic.co/guide/en/elasticsearch/reference/current/deb.html#deb-repo - wget -qO - https://packages.elastic.co/GPG-KEY-elasticsearch | sudo apt-key add - - > if [[ $VERSION_ES == '>=2.0.0,<3.0.0' ]]; then echo "deb http://packages.elastic.co/elasticsearch/2.x/debian stable main" | sudo tee -a /etc/apt/sources.list.d/elasticsearch-2.x.list sudo apt-get update sudo apt-get -y --allow-downgrades install elasticsearch=2.4.5 else echo "deb http://packages.elastic.co/elasticsearch/1.7/debian stable main" | sudo tee -a /etc/apt/sources.list.d/elasticsearch-1.7.list sudo apt-get update sudo apt-get -y --allow-downgrades install elasticsearch=1.7.6 fi - sudo service elasticsearch restart install: - pip install --upgrade setuptools - pip install requests "Django${DJANGO_VERSION}" "elasticsearch${VERSION_ES}" - python setup.py clean build install before_script: - BACKGROUND_SOLR=true test_haystack/solr_tests/server/start-solr-test-server.sh script: - python test_haystack/solr_tests/server/wait-for-solr - python setup.py test env: matrix: - DJANGO_VERSION=">=1.11,<2.0" VERSION_ES=">=1.0.0,<2.0.0" - DJANGO_VERSION=">=2.0,<2.1" VERSION_ES=">=1.0.0,<2.0.0" - DJANGO_VERSION=">=1.11,<2.0" VERSION_ES=">=2.0.0,<3.0.0" - DJANGO_VERSION=">=2.0,<2.1" VERSION_ES=">=2.0.0,<3.0.0" matrix: allow_failures: - python: 'pypy' exclude: - python: 2.7 env: DJANGO_VERSION=">=2.0,<2.1" VERSION_ES=">=2.0.0,<3.0.0" - python: 2.7 env: DJANGO_VERSION=">=2.0,<2.1" VERSION_ES=">=1.0.0,<2.0.0" notifications: irc: "irc.freenode.org#haystack" email: false django-haystack-2.8.0/AUTHORS000066400000000000000000000171601325051407000156420ustar00rootroot00000000000000Primary Authors: * Daniel Lindsley * Matt Croydon (some documentation, sanity checks and the sweet name) * Travis Cline (the original SQ implementation, improvements to ModelSearchIndex) * David Sauve (notanumber) for the Xapian backend, the simple backend and various patches. * Jannis Leidel (jezdez) * Chris Adams (acdha) * Justin Caratzas (bigjust) * Andrew Schoen (andrewschoen) * Dan Watson (dcwatson) * Matt Woodward (mpwoodward) * Alex Vidal (avidal) * Zach Smith (zmsmith) * Stefan Wehrmeyer (stefanw) * George Hickman (ghickman) * Ben Spaulding (benspaulding) Thanks to * Jacob Kaplan-Moss & Joseph Kocherhans for the original implementation of djangosearch, of which portions were used, as well as basic API feedback. * Christian Metts for designing the logo and building a better site. * Nathan Borror for testing and advanced form usage. * Malcolm Tredinnick for API feedback. * Mediaphormedia for funding the development on More Like This and faceting. * Travis Cline for API feedback, Git help and improvements to the reindex command. * Brian Rosner for various patches. * Richard Boulton for feedback and suggestions. * Cyberdelia for feedback and patches. * Ask Solem for for patching the setup.py. * Ben Spaulding for feedback and documentation patches. * smulloni for various patches. * JoeGermuska for various patches. * SmileyChris for various patches. * sk1p for various patches. * Ryszard Szopa (ryszard) for various patches. * Patryk Zawadzki (patrys) for various patches and feedback. * Frank Wiles for documentation patches. * Chris Adams (acdha) for various patches. * Kyle MacFarlane for various patches. * Alex Gaynor (alex) for help with handling deferred models with More Like This. * RobertGawron for a patch to the Highlighter. * Simon Willison (simonw) for various proposals and patches. * Ben Firshman (bfirsh) for faceting improvements and suggestions. * Peter Bengtsson for a patch regarding passing a customized site. * Sam Bull (osirius) for a patch regarding initial data on SearchForms. * slai for a patch regarding Whoosh and fetching all documents of a certain model type. * alanwj for a patch regarding Whoosh and empty MultiValueFields. * alanzoppa for a patch regarding highlighting. * piquadrat for a patch regarding the more_like_this template tag. * dedsm for a patch regarding the pickling of SearchResult objects. * EmilStenstrom for a patch to the Highlighter. * symroe for a patch regarding the more_like_this template tag. * ghostrocket for a patch regarding the simple backend. * Rob Hudson (robhudson) for improvements to the admin search. * apollo13 for simplifying ``SearchForm.__init__``. * Carl Meyer (carljm) for a patch regarding character primary keys. * oyiptong for a patch regarding pickling. * alfredo for a patch to generate epub docs. * Luke Hatcher (lukeman) for documentation patches. * Trey Hunner (treyhunner) for a Whoosh field boosting patch. * Kent Gormat of Retail Catalyst for funding the development of multiple index support. * Gidsy for funding the initial geospatial implementation * CMGdigital for funding the development on: * a multiprocessing-enabled version of ``update_index``. * the addition of ``--start/--end`` options in ``update_index``. * the ability to specify both apps & models to ``update_index``. * A significant portion of the geospatial feature. * A significant portion of the input types feature. * Aram Dulyan (Aramgutang) for fixing the included admin class to be Django 1.4 compatible. * Honza Kral (HonzaKral) for various Elasticsearch tweaks & testing. * Alex Vidal (avidal) for a patch allowing developers to override the queryset used for update operations. * Igor Támara (ikks) for a patch related to Unicode ``verbose_name_plural``. * Dan Helfman (witten) for a patch related to highlighting. * Matt DeBoard for refactor of ``SolrSearchBackend.search`` method to allow simpler extension of the class. * Rodrigo Guzman (rz) for a fix to query handling in the ``simple`` backend. * Martin J. Laubach (mjl) for fixing the logic used when combining querysets * Eric Holscher (ericholscher) for a docs fix. * Erik Rose (erikrose) for a quick pyelasticsearch-compatibility patch * Stefan Wehrmeyer (stefanw) for a simple search filter fix * Dan Watson (dcwatson) for various patches. * Andrew Schoen (andrewschoen) for the addition of ``HAYSTACK_IDENTIFIER_METHOD`` * Pablo SEMINARIO (pabluk) for a docs fix, and a fix in the ElasticSearch backend. * Eric Thurgood (ethurgood) for a import fix in the Elasticssearch backend. * Revolution Systems & The Python Software Foundation for funding a significant portion of the port to Python 3! * Artem Kostiuk (postatum) for patch allowing to search for slash character in ElasticSearch since Lucene 4.0. * Luis Barrueco (luisbarrueco) for a simple fix regarding updating indexes using multiple backends. * Szymon Teżewski (jasisz) for an update to the bounding-box calculation for spatial queries * Chris Wilson (qris) and Orlando Fiol (overflow) for an update allowing the use of multiple order_by() fields with Whoosh as long as they share a consistent sort direction * Steven Skoczen (@skoczen) for an ElasticSearch bug fix * @Xaroth for updating the app loader to be compatible with Django 1.7 * Jaroslav Gorjatsev (jarig) for a bugfix with index_fieldname * Dirk Eschler (@deschler) for app loader Django 1.7 compatibility fixes * Wictor (wicol) for a patch improving the error message given when model_attr references a non-existent field * Pierre Dulac (dulaccc) for a patch updating distance filters for ElasticSearch 1.x * Andrei Fokau (andreif) for adding support for ``SQ`` in ``SearchQuerySet.narrow()`` * Phill Tornroth (phill-tornroth) for several patches improving UnifiedIndex and ElasticSearch support * Philippe Luickx (philippeluickx) for documenting how to provide backend-specific facet options * Felipe Prenholato (@chronossc) for a patch making it easy to exclude documents from indexing using custom logic * Alfredo Armanini (@phingage) for a patch fixing compatibility with database API changes in Django 1.8 * Ben Spaulding (@benspaulding) for many updates for Django 1.8 support * Troy Grosfield (@troygrosfield) for fixing the test runner for Django 1.8 * Ilan Steemers (@Koed00) for fixing Django 1.9 deprecation warnings * Ana Carolina (@anacarolinats) and Steve Bussetti (@sbussetti) for adding the ``fuzzy`` operator to SearchQuerySet * Tadas Dailyda (@skirsdeda) for various patches * Craig de Stigter (@craigds) for a patch fixing concurrency issues when building UnifiedIndex * Claude Paroz (@claudep) for Django 1.9 support * Chris Brooke (@chrisbrooke) for patching around a backwards-incompatible change in ElasticSearch 2 * Gilad Beeri (@giladbeeri) for adding retries when updating a backend * Arjen Verstoep (@terr) for a patch that allows attribute lookups through Django ManyToManyField relationships * Tim Babych (@tymofij) for enabling backend-specific parameters in ``.highlight()`` * Antony Raj (@antonyr) for adding endswith input type and fixing contains input type * Morgan Aubert (@ellmetha) for Django 1.10 support * João Junior (@joaojunior) and Bruno Marques (@ElSaico) for Elasticsearch 2.x support * Alex Tomkins (@tomkins) for various patches * Martin Pauly (@mpauly) for Django 2.0 support django-haystack-2.8.0/CONTRIBUTING.md000066400000000000000000000075401325051407000170240ustar00rootroot00000000000000Contributing ============ Haystack is open-source and, as such, grows (or shrinks) & improves in part due to the community. Below are some guidelines on how to help with the project. Philosophy ---------- * Haystack is BSD-licensed. All contributed code must be either * the original work of the author, contributed under the BSD, or... * work taken from another project released under a BSD-compatible license. * GPL'd (or similar) works are not eligible for inclusion. * Haystack's git master branch should always be stable, production-ready & passing all tests. * Major releases (1.x.x) are commitments to backward-compatibility of the public APIs. Any documented API should ideally not change between major releases. The exclusion to this rule is in the event of either a security issue or to accommodate changes in Django itself. * Minor releases (x.3.x) are for the addition of substantial features or major bugfixes. * Patch releases (x.x.4) are for minor features or bugfixes. Guidelines For Reporting An Issue/Feature ----------------------------------------- So you've found a bug or have a great idea for a feature. Here's the steps you should take to help get it added/fixed in Haystack: * First, check to see if there's an existing issue/pull request for the bug/feature. All issues are at https://github.com/toastdriven/django-haystack/issues and pull reqs are at https://github.com/toastdriven/django-haystack/pulls. * If there isn't one there, please file an issue. The ideal report includes: * A description of the problem/suggestion. * How to recreate the bug. * If relevant, including the versions of your: * Python interpreter * Django * Haystack * Search engine used (as well as bindings) * Optionally of the other dependencies involved * Ideally, creating a pull request with a (failing) test case demonstrating what's wrong. This makes it easy for us to reproduce & fix the problem. Github has a great guide for writing an effective pull request: https://github.com/blog/1943-how-to-write-the-perfect-pull-request Instructions for running the tests are at https://django-haystack.readthedocs.io/en/latest/running_tests.html You might also hop into the IRC channel (`#haystack` on `irc.freenode.net`) & raise your question there, as there may be someone who can help you with a work-around. Guidelines For Contributing Code -------------------------------- If you're ready to take the plunge & contribute back some code/docs, the process should look like: * Fork the project on GitHub into your own account. * Clone your copy of Haystack. * Make a new branch in git & commit your changes there. * Push your new branch up to GitHub. * Again, ensure there isn't already an issue or pull request out there on it. If there is & you feel you have a better fix, please take note of the issue number & mention it in your pull request. * Create a new pull request (based on your branch), including what the problem/feature is, versions of your software & referencing any related issues/pull requests. In order to be merged into Haystack, contributions must have the following: * A solid patch that: * is clear. * works across all supported versions of Python/Django. * follows the existing style of the code base (mostly PEP-8). * comments included as needed to explain why the code functions as it does * A test case that demonstrates the previous flaw that now passes with the included patch. * If it adds/changes a public API, it must also include documentation for those changes. * Must be appropriately licensed (see [Philosophy](#philosophy)). * Adds yourself to the AUTHORS file. If your contribution lacks any of these things, they will have to be added by a core contributor before being merged into Haystack proper, which may take substantial time for the all-volunteer team to get to. django-haystack-2.8.0/LICENSE000066400000000000000000000031071325051407000155730ustar00rootroot00000000000000Copyright (c) 2009-2013, Daniel Lindsley. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of Haystack nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. --- Prior to April 17, 2009, this software was released under the MIT license. django-haystack-2.8.0/MANIFEST.in000066400000000000000000000001761325051407000163270ustar00rootroot00000000000000recursive-include docs * recursive-include haystack/templates *.xml *.html include AUTHORS include LICENSE include README.rst django-haystack-2.8.0/README.rst000066400000000000000000000041771325051407000162650ustar00rootroot00000000000000======== Haystack ======== :author: Daniel Lindsley :date: 2013/07/28 Haystack provides modular search for Django. It features a unified, familiar API that allows you to plug in different search backends (such as Solr_, Elasticsearch_, Whoosh_, Xapian_, etc.) without having to modify your code. .. _Solr: http://lucene.apache.org/solr/ .. _Elasticsearch: https://www.elastic.co/products/elasticsearch .. _Whoosh: https://bitbucket.org/mchaput/whoosh/ .. _Xapian: http://xapian.org/ Haystack is BSD licensed, plays nicely with third-party app without needing to modify the source and supports advanced features like faceting, More Like This, highlighting, spatial search and spelling suggestions. You can find more information at http://haystacksearch.org/. Getting Help ============ There is a mailing list (http://groups.google.com/group/django-haystack/) available for general discussion and an IRC channel (#haystack on irc.freenode.net). Documentation ============= * Development version: http://docs.haystacksearch.org/ * v2.6.X: https://django-haystack.readthedocs.io/en/v2.6.0/ * v2.5.X: https://django-haystack.readthedocs.io/en/v2.5.0/ * v2.4.X: https://django-haystack.readthedocs.io/en/v2.4.1/ * v2.3.X: https://django-haystack.readthedocs.io/en/v2.3.0/ * v2.2.X: https://django-haystack.readthedocs.io/en/v2.2.0/ * v2.1.X: https://django-haystack.readthedocs.io/en/v2.1.0/ * v2.0.X: https://django-haystack.readthedocs.io/en/v2.0.0/ * v1.2.X: https://django-haystack.readthedocs.io/en/v1.2.7/ * v1.1.X: https://django-haystack.readthedocs.io/en/v1.1/ See the `changelog `_ Build Status ============ .. image:: https://travis-ci.org/django-haystack/django-haystack.svg?branch=master :target: https://travis-ci.org/django-haystack/django-haystack Requirements ============ Haystack has a relatively easily-met set of requirements. * Python 2.7+ or Python 3.3+ * A supported version of Django: https://www.djangoproject.com/download/#supported-versions Additionally, each backend has its own requirements. You should refer to https://django-haystack.readthedocs.io/en/latest/installing_search_engines.html for more details. django-haystack-2.8.0/docs/000077500000000000000000000000001325051407000155155ustar00rootroot00000000000000django-haystack-2.8.0/docs/Makefile000066400000000000000000000046741325051407000171700ustar00rootroot00000000000000# Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d _build/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . .PHONY: help clean html web pickle htmlhelp latex changes linkcheck help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " changes to make an overview over all changed/added/deprecated items" @echo " linkcheck to check all external links for integrity" clean: -rm -rf _build/* html: mkdir -p _build/html _build/doctrees $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) _build/html @echo @echo "Build finished. The HTML pages are in _build/html." pickle: mkdir -p _build/pickle _build/doctrees $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) _build/pickle @echo @echo "Build finished; now you can process the pickle files." web: pickle json: mkdir -p _build/json _build/doctrees $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) _build/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: mkdir -p _build/htmlhelp _build/doctrees $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) _build/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in _build/htmlhelp." latex: mkdir -p _build/latex _build/doctrees $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) _build/latex @echo @echo "Build finished; the LaTeX files are in _build/latex." @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \ "run these through (pdf)latex." changes: mkdir -p _build/changes _build/doctrees $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) _build/changes @echo @echo "The overview file is in _build/changes." linkcheck: mkdir -p _build/linkcheck _build/doctrees $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) _build/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in _build/linkcheck/output.txt." epub: $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) _build/epub @echo @echo "Build finished. The epub file is in _build/epub." django-haystack-2.8.0/docs/_build/000077500000000000000000000000001325051407000167535ustar00rootroot00000000000000django-haystack-2.8.0/docs/_build/.gitignore000066400000000000000000000000001325051407000207310ustar00rootroot00000000000000django-haystack-2.8.0/docs/_static/000077500000000000000000000000001325051407000171435ustar00rootroot00000000000000django-haystack-2.8.0/docs/_static/.gitignore000066400000000000000000000000001325051407000211210ustar00rootroot00000000000000django-haystack-2.8.0/docs/_templates/000077500000000000000000000000001325051407000176525ustar00rootroot00000000000000django-haystack-2.8.0/docs/_templates/.gitignore000066400000000000000000000000001325051407000216300ustar00rootroot00000000000000django-haystack-2.8.0/docs/admin.rst000066400000000000000000000024531325051407000173430ustar00rootroot00000000000000.. _ref-admin: =================== Django Admin Search =================== Haystack comes with a base class to support searching via Haystack in the Django admin. To use Haystack to search, inherit from ``haystack.admin.SearchModelAdmin`` instead of ``django.contrib.admin.ModelAdmin``. For example:: from haystack.admin import SearchModelAdmin from .models import MockModel class MockModelAdmin(SearchModelAdmin): haystack_connection = 'solr' date_hierarchy = 'pub_date' list_display = ('author', 'pub_date') admin.site.register(MockModel, MockModelAdmin) You can also specify the Haystack connection used by the search with the ``haystack_connection`` property on the model admin class. If not specified, the default connection will be used. If you already have a base model admin class you use, there is also a mixin you can use instead:: from django.contrib import admin from haystack.admin import SearchModelAdminMixin from .models import MockModel class MyCustomModelAdmin(admin.ModelAdmin): pass class MockModelAdmin(SearchModelAdminMixin, MyCustomModelAdmin): haystack_connection = 'solr' date_hierarchy = 'pub_date' list_display = ('author', 'pub_date') admin.site.register(MockModel, MockModelAdmin) django-haystack-2.8.0/docs/architecture_overview.rst000066400000000000000000000032501325051407000226570ustar00rootroot00000000000000.. _ref-architecture-overview: ===================== Architecture Overview ===================== ``SearchQuerySet`` ------------------ One main implementation. * Standard API that loosely follows ``QuerySet`` * Handles most queries * Allows for custom "parsing"/building through API * Dispatches to ``SearchQuery`` for actual query * Handles automatically creating a query * Allows for raw queries to be passed straight to backend. ``SearchQuery`` --------------- Implemented per-backend. * Method for building the query out of the structured data. * Method for cleaning a string of reserved characters used by the backend. Main class provides: * Methods to add filters/models/order-by/boost/limits to the search. * Method to perform a raw search. * Method to get the number of hits. * Method to return the results provided by the backend (likely not a full list). ``SearchBackend`` ----------------- Implemented per-backend. * Connects to search engine * Method for saving new docs to index * Method for removing docs from index * Method for performing the actual query ``SearchSite`` -------------- One main implementation. * Standard API that loosely follows ``django.contrib.admin.sites.AdminSite`` * Handles registering/unregistering models to search on a per-site basis. * Provides a means of adding custom indexes to a model, like ``ModelAdmins``. ``SearchIndex`` --------------- Implemented per-model you wish to index. * Handles generating the document to be indexed. * Populates additional fields to accompany the document. * Provides a way to limit what types of objects get indexed. * Provides a way to index the document(s). * Provides a way to remove the document(s). django-haystack-2.8.0/docs/autocomplete.rst000066400000000000000000000165141325051407000207570ustar00rootroot00000000000000.. _ref-autocomplete: ============ Autocomplete ============ Autocomplete is becoming increasingly common as an add-on to search. Haystack makes it relatively simple to implement. There are two steps in the process, one to prepare the data and one to implement the actual search. Step 1. Setup The Data ====================== To do autocomplete effectively, the search backend uses n-grams (essentially a small window passed over the string). Because this alters the way your data needs to be stored, the best approach is to add a new field to your ``SearchIndex`` that contains the text you want to autocomplete on. You have two choices: ``NgramField`` and ``EdgeNgramField``. Though very similar, the choice of field is somewhat important. * If you're working with standard text, ``EdgeNgramField`` tokenizes on whitespace. This prevents incorrect matches when part of two different words are mashed together as one n-gram. **This is what most users should use.** * If you're working with Asian languages or want to be able to autocomplete across word boundaries, ``NgramField`` should be what you use. Example (continuing from the tutorial):: import datetime from haystack import indexes from myapp.models import Note class NoteIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) author = indexes.CharField(model_attr='user') pub_date = indexes.DateTimeField(model_attr='pub_date') # We add this for autocomplete. content_auto = indexes.EdgeNgramField(model_attr='content') def get_model(self): return Note def index_queryset(self, using=None): """Used when the entire index for model is updated.""" return Note.objects.filter(pub_date__lte=datetime.datetime.now()) As with all schema changes, you'll need to rebuild/update your index after making this change. Step 2. Performing The Query ============================ Haystack ships with a convenience method to perform most autocomplete searches. You simply provide a field and the query you wish to search on to the ``SearchQuerySet.autocomplete`` method. Given the previous example, an example search would look like:: from haystack.query import SearchQuerySet SearchQuerySet().autocomplete(content_auto='old') # Result match things like 'goldfish', 'cuckold' and 'older'. The results from the ``SearchQuerySet.autocomplete`` method are full search results, just like any regular filter. If you need more control over your results, you can use standard ``SearchQuerySet.filter`` calls. For instance:: from haystack.query import SearchQuerySet sqs = SearchQuerySet().filter(content_auto=request.GET.get('q', '')) This can also be extended to use ``SQ`` for more complex queries (and is what's being done under the hood in the ``SearchQuerySet.autocomplete`` method). Example Implementation ====================== The above is the low-level backend portion of how you implement autocomplete. To make it work in browser, you need both a view to run the autocomplete and some Javascript to fetch the results. Since it comes up often, here is an example implementation of those things. .. warning:: This code comes with no warranty. Don't ask for support on it. If you copy-paste it and it burns down your server room, I'm not liable for any of it. It worked this one time on my machine in a simulated environment. And yeah, semicolon-less + 2 space + comma-first. Deal with it. A stripped-down view might look like:: # views.py import simplejson as json from django.http import HttpResponse from haystack.query import SearchQuerySet def autocomplete(request): sqs = SearchQuerySet().autocomplete(content_auto=request.GET.get('q', ''))[:5] suggestions = [result.title for result in sqs] # Make sure you return a JSON object, not a bare list. # Otherwise, you could be vulnerable to an XSS attack. the_data = json.dumps({ 'results': suggestions }) return HttpResponse(the_data, content_type='application/json') The template might look like:: Autocomplete Example

Autocomplete Example

django-haystack-2.8.0/docs/backend_support.rst000066400000000000000000000103071325051407000214330ustar00rootroot00000000000000.. _ref-backend-support: =============== Backend Support =============== Supported Backends ================== * Solr_ * ElasticSearch_ * Whoosh_ * Xapian_ .. _Solr: http://lucene.apache.org/solr/ .. _ElasticSearch: http://elasticsearch.org/ .. _Whoosh: https://bitbucket.org/mchaput/whoosh/ .. _Xapian: http://xapian.org/ Backend Capabilities ==================== Solr ---- **Complete & included with Haystack.** * Full SearchQuerySet support * Automatic query building * "More Like This" functionality * Term Boosting * Faceting * Stored (non-indexed) fields * Highlighting * Spatial search * Requires: pysolr (2.0.13+) & Solr 3.5+ ElasticSearch ------------- **Complete & included with Haystack.** * Full SearchQuerySet support * Automatic query building * "More Like This" functionality * Term Boosting * Faceting (up to 100 facets) * Stored (non-indexed) fields * Highlighting * Spatial search * Requires: `elasticsearch-py `_ 1.x or 2.x. ElasticSearch 5.X is currently unsupported: see `#1383 `_. Whoosh ------ **Complete & included with Haystack.** * Full SearchQuerySet support * Automatic query building * "More Like This" functionality * Term Boosting * Stored (non-indexed) fields * Highlighting * Requires: whoosh (2.0.0+) Xapian ------ **Complete & available as a third-party download.** * Full SearchQuerySet support * Automatic query building * "More Like This" functionality * Term Boosting * Faceting * Stored (non-indexed) fields * Highlighting * Requires: Xapian 1.0.5+ & python-xapian 1.0.5+ * Backend can be downloaded here: `xapian-haystack `__ Backend Support Matrix ====================== +----------------+------------------------+---------------------+----------------+------------+----------+---------------+--------------+---------+ | Backend | SearchQuerySet Support | Auto Query Building | More Like This | Term Boost | Faceting | Stored Fields | Highlighting | Spatial | +================+========================+=====================+================+============+==========+===============+==============+=========+ | Solr | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | +----------------+------------------------+---------------------+----------------+------------+----------+---------------+--------------+---------+ | ElasticSearch | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | +----------------+------------------------+---------------------+----------------+------------+----------+---------------+--------------+---------+ | Whoosh | Yes | Yes | Yes | Yes | No | Yes | Yes | No | +----------------+------------------------+---------------------+----------------+------------+----------+---------------+--------------+---------+ | Xapian | Yes | Yes | Yes | Yes | Yes | Yes | Yes (plugin) | No | +----------------+------------------------+---------------------+----------------+------------+----------+---------------+--------------+---------+ Unsupported Backends & Alternatives =================================== If you have a search engine which you would like to see supported in Haystack, the current recommendation is to develop a plugin following the lead of `xapian-haystack `_ so that project can be developed and tested independently of the core Haystack release schedule. Sphinx ------ This backend has been requested multiple times over the years but does not yet have a volunteer maintainer. If you would like to work on it, please contact the Haystack maintainers so your project can be linked here and, if desired, added to the `django-haystack `_ organization on GitHub. In the meantime, Sphinx users should consider Jorge C. Leitão's `django-sphinxql `_ project. django-haystack-2.8.0/docs/best_practices.rst000066400000000000000000000261401325051407000212440ustar00rootroot00000000000000.. _ref-best-practices: ============== Best Practices ============== What follows are some general recommendations on how to improve your search. Some tips represent performance benefits, some provide a better search index. You should evaluate these options for yourself and pick the ones that will work best for you. Not all situations are created equal and many of these options could be considered mandatory in some cases and unnecessary premature optimizations in others. Your mileage may vary. Good Search Needs Good Content ============================== Most search engines work best when they're given corpuses with predominantly text (as opposed to other data like dates, numbers, etc.) in decent quantities (more than a couple words). This is in stark contrast to the databases most people are used to, which rely heavily on non-text data to create relationships and for ease of querying. To this end, if search is important to you, you should take the time to carefully craft your ``SearchIndex`` subclasses to give the search engine the best information you can. This isn't necessarily hard but is worth the investment of time and thought. Assuming you've only ever used the ``BasicSearchIndex``, in creating custom ``SearchIndex`` classes, there are some easy improvements to make that will make your search better: * For your ``document=True`` field, use a well-constructed template. * Add fields for data you might want to be able to filter by. * If the model has related data, you can squash good content from those related models into the parent model's ``SearchIndex``. * Similarly, if you have heavily de-normalized models, it may be best represented by a single indexed model rather than many indexed models. Well-Constructed Templates -------------------------- A relatively unique concept in Haystack is the use of templates associated with ``SearchIndex`` fields. These are data templates, will never been seen by users and ideally contain no HTML. They are used to collect various data from the model and structure it as a document for the search engine to analyze and index. .. note:: If you read nothing else, this is the single most important thing you can do to make search on your site better for your users. Good templates can make or break your search and providing the search engine with good content to index is critical. Good templates structure the data well and incorporate as much pertinent text as possible. This may include additional fields such as titles, author information, metadata, tags/categories. Without being artificial, you want to construct as much context as you can. This doesn't mean you should necessarily include every field, but you should include fields that provide good content or include terms you think your users may frequently search on. Unless you have very unique numbers or dates, neither of these types of data are a good fit within templates. They are usually better suited to other fields for filtering within a ``SearchQuerySet``. Additional Fields For Filtering ------------------------------- Documents by themselves are good for generating indexes of content but are generally poor for filtering content, for instance, by date. All search engines supported by Haystack provide a means to associate extra data as attributes/fields on a record. The database analogy would be adding extra columns to the table for filtering. Good candidates here are date fields, number fields, de-normalized data from related objects, etc. You can expose these things to users in the form of a calendar range to specify, an author to look up or only data from a certain series of numbers to return. You will need to plan ahead and anticipate what you might need to filter on, though with each field you add, you increase storage space usage. It's generally **NOT** recommended to include every field from a model, just ones you are likely to use. Related Data ------------ Related data is somewhat problematic to deal with, as most search engines are better with documents than they are with relationships. One way to approach this is to de-normalize a related child object or objects into the parent's document template. The inclusion of a foreign key's relevant data or a simple Django ``{% for %}`` templatetag to iterate over the related objects can increase the salient data in your document. Be careful what you include and how you structure it, as this can have consequences on how well a result might rank in your search. Avoid Hitting The Database ========================== A very easy but effective thing you can do to drastically reduce hits on the database is to pre-render your search results using stored fields then disabling the ``load_all`` aspect of your ``SearchView``. .. warning:: This technique may cause a substantial increase in the size of your index as you are basically using it as a storage mechanism. To do this, you setup one or more stored fields (`indexed=False`) on your ``SearchIndex`` classes. You should specify a template for the field, filling it with the data you'd want to display on your search results pages. When the model attached to the ``SearchIndex`` is placed in the index, this template will get rendered and stored in the index alongside the record. .. note:: The downside of this method is that the HTML for the result will be locked in once it is indexed. To make changes to the structure, you'd have to reindex all of your content. It also limits you to a single display of the content (though you could use multiple fields if that suits your needs). The second aspect is customizing your ``SearchView`` and its templates. First, pass the ``load_all=False`` to your ``SearchView``, ideally in your URLconf. This prevents the ``SearchQuerySet`` from loading all models objects for results ahead of time. Then, in your template, simply display the stored content from your ``SearchIndex`` as the HTML result. .. warning:: To do this, you must absolutely avoid using ``{{ result.object }}`` or any further accesses beyond that. That call will hit the database, not only nullifying your work on lessening database hits, but actually making it worse as there will now be at least query for each result, up from a single query for each type of model with ``load_all=True``. Content-Type Specific Templates =============================== Frequently, when displaying results, you'll want to customize the HTML output based on what model the result represents. In practice, the best way to handle this is through the use of ``include`` along with the data on the ``SearchResult``. Your existing loop might look something like:: {% for result in page.object_list %}

{{ result.object.title }}

{% empty %}

No results found.

{% endfor %} An improved version might look like:: {% for result in page.object_list %} {% if result.content_type == "blog.post" %} {% include "search/includes/blog/post.html" %} {% endif %} {% if result.content_type == "media.photo" %} {% include "search/includes/media/photo.html" %} {% endif %} {% empty %}

No results found.

{% endfor %} Those include files might look like:: # search/includes/blog/post.html

{{ result.object.title }}

{{ result.object.tease }}

# search/includes/media/photo.html

Taken By {{ result.object.taken_by }}

You can make this even better by standardizing on an includes layout, then writing a template tag or filter that generates the include filename. Usage might looks something like:: {% for result in page.object_list %} {% with result|search_include as fragment %} {% include fragment %} {% endwith %} {% empty %}

No results found.

{% endfor %} Real-Time Search ================ If your site sees heavy search traffic and up-to-date information is very important, Haystack provides a way to constantly keep your index up to date. You can enable the ``RealtimeSignalProcessor`` within your settings, which will allow Haystack to automatically update the index whenever a model is saved/deleted. You can find more information within the :doc:`signal_processors` documentation. Use Of A Queue For A Better User Experience =========================================== By default, you have to manually reindex content, Haystack immediately tries to merge it into the search index. If you have a write-heavy site, this could mean your search engine may spend most of its time churning on constant merges. If you can afford a small delay between when a model is saved and when it appears in the search results, queuing these merges is a good idea. You gain a snappier interface for users as updates go into a queue (a fast operation) and then typical processing continues. You also get a lower churn rate, as most search engines deal with batches of updates better than many single updates. You can also use this to distribute load, as the queue consumer could live on a completely separate server from your webservers, allowing you to tune more efficiently. Implementing this is relatively simple. There are two parts, creating a new ``QueuedSignalProcessor`` class and creating a queue processing script to handle the actual updates. For the ``QueuedSignalProcessor``, you should inherit from ``haystack.signals.BaseSignalProcessor``, then alter the ``setup/teardown`` methods to call an enqueuing method instead of directly calling ``handle_save/handle_delete``. For example:: from haystack import signals class QueuedSignalProcessor(signals.BaseSignalProcessor): # Override the built-in. def setup(self): models.signals.post_save.connect(self.enqueue_save) models.signals.post_delete.connect(self.enqueue_delete) # Override the built-in. def teardown(self): models.signals.post_save.disconnect(self.enqueue_save) models.signals.post_delete.disconnect(self.enqueue_delete) # Add on a queuing method. def enqueue_save(self, sender, instance, **kwargs): # Push the save & information onto queue du jour here ... # Add on a queuing method. def enqueue_delete(self, sender, instance, **kwargs): # Push the delete & information onto queue du jour here ... For the consumer, this is much more specific to the queue used and your desired setup. At a minimum, you will need to periodically consume the queue, fetch the correct index from the ``SearchSite`` for your application, load the model from the message and pass that model to the ``update_object`` or ``remove_object`` methods on the ``SearchIndex``. Proper grouping, batching and intelligent handling are all additional things that could be applied on top to further improve performance. django-haystack-2.8.0/docs/boost.rst000066400000000000000000000072671325051407000174110ustar00rootroot00000000000000.. _ref-boost: ===== Boost ===== Scoring is a critical component of good search. Normal full-text searches automatically score a document based on how well it matches the query provided. However, sometimes you want certain documents to score better than they otherwise would. Boosting is a way to achieve this. There are three types of boost: * Term Boost * Document Boost * Field Boost .. note:: Document & Field boost support was added in Haystack 1.1. Despite all being types of boost, they take place at different times and have slightly different effects on scoring. Term boost happens at query time (when the search query is run) and is based around increasing the score if a certain word/phrase is seen. On the other hand, document & field boosts take place at indexing time (when the document is being added to the index). Document boost causes the relevance of the entire result to go up, where field boost causes only searches within that field to do better. .. warning:: Be warned that boost is very, very sensitive & can hurt overall search quality if over-zealously applied. Even very small adjustments can affect relevance in a big way. Term Boost ========== Term boosting is achieved by using ``SearchQuerySet.boost``. You provide it the term you want to boost on & a floating point value (based around ``1.0`` as 100% - no boost). Example:: # Slight increase in relevance for documents that include "banana". sqs = SearchQuerySet().boost('banana', 1.1) # Big decrease in relevance for documents that include "blueberry". sqs = SearchQuerySet().boost('blueberry', 0.8) See the :doc:`searchqueryset_api` docs for more details on using this method. Document Boost ============== Document boosting is done by adding a ``boost`` field to the prepared data ``SearchIndex`` creates. The best way to do this is to override ``SearchIndex.prepare``:: from haystack import indexes from notes.models import Note class NoteSearchIndex(indexes.SearchIndex, indexes.Indexable): # Your regular fields here then... def prepare(self, obj): data = super(NoteSearchIndex, self).prepare(obj) data['boost'] = 1.1 return data Another approach might be to add a new field called ``boost``. However, this can skew your schema and is not encouraged. Field Boost =========== Field boosting is enabled by setting the ``boost`` kwarg on the desired field. An example of this might be increasing the significance of a ``title``:: from haystack import indexes from notes.models import Note class NoteSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) title = indexes.CharField(model_attr='title', boost=1.125) def get_model(self): return Note .. note:: Field boosting only has an effect when the SearchQuerySet filters on the field which has been boosted. If you are using a default search view or form you will need override the search method or other include the field in your search query. This example CustomSearchForm searches the automatic ``content`` field and the ``title`` field which has been boosted:: from haystack.forms import SearchForm class CustomSearchForm(SearchForm): def search(self): if not self.is_valid(): return self.no_query_found() if not self.cleaned_data.get('q'): return self.no_query_found() q = self.cleaned_data['q'] sqs = self.searchqueryset.filter(SQ(content=AutoQuery(q)) | SQ(title=AutoQuery(q))) if self.load_all: sqs = sqs.load_all() return sqs.highlight() django-haystack-2.8.0/docs/changelog.rst000066400000000000000000005122731325051407000202100ustar00rootroot00000000000000Changelog ========= v2.6.0 (2017-01-02) ------------------- - Merge #1460: backend support for Elasticsearch 2.x. [Chris Adams] Thanks to João Junior (@joaojunior) and Bruno Marques (@ElSaico) for the patch Closes #1460 Closes #1391 Closes #1336 Closes #1247 - Docs: update Elasticsearch support status. [Chris Adams] - Tests: avoid unrelated failures when elasticsearch is not installed. [Chris Adams] This avoids spurious failures in tests for other search engines when the elasticsearch client library is not installed at all but the ES backend is still declared in the settings. - Tests: friendlier log message for ES version checks. [Chris Adams] This avoids a potentially scary-looking ImportError flying by in the test output for what's expected in normal usage. - Tests: update ES version detection in settings. [Chris Adams] This allows the tests to work when run locally or otherwise outside of our Travis / Tox scripts by obtaining the version from the installed `elasticsearch` client library. - Tests: update ES1 client version check message. [Chris Adams] The name of the Python module changed over time and this now matches the ES2 codebase behaviour of having the error message give you the exact package to install including the version. - Update travis script with ES documentation. [Chris Adams] Add a comment for anyone wondering why this isn't a simple `add-apt-repository` call - Fixed More Like This test with deferred query on Elasticsearch 2.x. [Bruno Marques] - Fixed expected query behaviour on ES2.x test. [Bruno Marques] - Install elasticsearch2.0 via apt. [joaojunior] - Install elasticsearch2.0 via apt. [joaojunior] - Remove typo. [joaojunior] - Remove services elasticsearch. [joaojunior] - Fix typo. [joaojunior] - Sudo=true in .travis.yml to install elasticsearch from apt-get. [joaojunior] - Fix .travis. [joaojunior] - Add logging in __init__ tests elasticsearch. [joaojunior] - Get changes from Master to resolve conflicts. [joaojunior] - Install elasticsearch1.7 via apt. [joaojunior] - Update Files to run tests in Elasticsearch2.x. [joaojunior] - Refactoring the code in pull request #1336 . This pull request is to permit use ElasticSearch 2.X. [joaojunior] - Improved custom object identifier test. [Chris Adams] This provides an example for implementors and ensures that failing to use the custom class would cause a test failure. - Update management backend documentation for `--using` [flinkflonk] Thanks to @flinkflonk for the patch! Closes #1215 - Fix filtered "more like this" queries (#1459) [David Cook] Now the Solr backend correctly handles a `more_like_this()` query which is subsequently `filter()`-ed. Thanks to @divergentdave for the patch and tests! - ReStructuredText link format fixes. (#1458) [John Heasly] - Add note to Backend Support docs about lack of ES 5.X support. (#1457) [John Heasly] - Replace deprecated Point.get_coords() calls. [Chris Adams] This works as far back as Django 1.8, which is the earliest which we support. See #1454 - Use setuptools_scm to manage package version numbers. [Chris Adams] v2.5.1 (2016-10-28) ------------------- New ~~~ - Support for Django 1.10. [Chris Adams] Thanks to Morgan Aubert (@ellmetha) for the patch Closes #1434 Closes #1437 Closes #1445 Fix ~~~ - Contains filter, add endswith filter. [Antony] * `__contains` now works in a more intuitive manner (the previous behaviour remains the default for `=` shortcut queries and can be requested explicitly with `__content`) * `__endswith` is now supported as the logical counterpart to `__startswith` Thanks to @antonyr for the patch and @sebslomski for code review and testing. Other ~~~~~ - V2.5.1. [Chris Adams] - Add support for Django 1.10 (refs: #1437, #1434) [Morgan Aubert] - Docs: fix Sphinx hierarchy issue. [Chris Adams] - Fix multiprocessing regression in update_index. [Chris Adams] 4e1e2e1c5df1ed1c5432b9d26fcb9dc1abab71f4 introduced a bug because it used a property name which exists on haystack.ConnectionHandler but not the Django ConnectionHandler class it's modeled on. Long-term, we should rename the Haystack class to something like `SearchConnectionHandler` to avoid future confusion. Closes #1449 - Doc: cleanup searchindex_api.rst. [Jack Norman] Thanks to Jack Norman (@jwnorman) for the patch - Merge pull request #1444 from jeremycline/master. [Chris Adams] Upgrade setuptools in Travis so urllib3-1.18 installs - Upgrade setuptools in Travis so urllib3-1.18 installs. [Jeremy Cline] The version of setuptools in Travis is too old to handle <= as an environment marker. - Tests: accept Solr/ES config from environment. [Chris Adams] This makes it easy to override these values for e.g. running test instances using Docker images with something like this: ``` TEST_ELASTICSEARCH_1_URL="http://$(docker port elasticsearch-1.7 9200/tcp)/" TEST_SOLR_URL="http://$(docker port solr-6 8983/tcp)/solr/" test_haystack/run_tests.py ``` See #1408 - Merge pull request #1418 from Alkalit/master. [Steve Byerly] Added link for 2.5.x version docs - Added link for 2.5.x version. [Alexey Kalinin] - Merge pull request #1432 from farooqaaa/master. [Steve Byerly] Added missing `--batch-size` argument for `rebuild_index` management command. - Added missing --batch-size argument. [Farooq Azam] - Merge pull request #1036 from merwok/patch-1. [Steve Byerly] Documentation update - Use ellipsis instead of pass. [Éric Araujo] - Fix code to enable highlighting. [Éric Araujo] - Merge pull request #1392 from browniebroke/bugfix/doc-error. [Steve Byerly] Fix Sphinx errors in the changelog - Fix Sphinx errors in the changelog. [Bruno Alla] - Merge pull request #1341 from tymofij/solr-hl-options. [Steve Byerly] - Merge master > tymofij/solr-hl-options. [Steve Byerly] - Make solr backend accept both shortened and full-form highlighting options. [Tim Babych] - Autoprefix 'hl.' for solr options. [Tim Babych] - Update gitignore to not track test artifacts. [Steve Byerly] - Merge pull request #1413 from tymofij/patch-2. [Steve Byerly] typo: suite -> suit - Typo: suite -> suit. [Tim Babych] - Merge pull request #1412 from SteveByerly/highlight_sqs_docs. [Steve Byerly] improve sqs highlight docs - illustrate custom parameters - Improve highlight docs for custom options. [Steve Byerly] v2.5.0 (2016-07-11) ------------------- New ~~~ - SearchQuerySet.set_spelling_query for custom spellcheck. [Chris Adams] This makes it much easier to customize the text sent to the backend search engine for spelling suggestions independently from the actual query being executed. - Support ManyToManyFields in model_attr lookups. [Arjen Verstoep] Thanks to @Terr for the patch - `update_index` will retry after backend failures. [Gilad Beeri] Now `update_index` will retry failures multiple times before aborting with a progressive time delay. Thanks to Gilad Beeri (@giladbeeri) for the patch - `highlight()` accepts custom values on Solr and ES. [Chris Adams] This allows the default values to be overriden and arbitrary backend-specific parameters may be provided to Solr or ElasticSearch. Thanks to @tymofij for the patch Closes #1334 - Allow Routers to return multiple indexes. [Chris Adams] Thanks to Hugo Chargois (@hchargois) for the patch Closes #1337 Closes #934 - Support for newer versions of Whoosh. [Chris Adams] - Split SearchView.create_response into get_context. [Chris Adams] This makes it easy to override the default `create_response` behaviour if you don't want a standard HTML response. Thanks @seocam for the patch Closes #1338 - Django 1.9 support thanks to Claude Paroz. [Chris Adams] - Create a changelog using gitchangelog. [Chris Adams] This uses `gitchangelog `_ to generate docs/changelog.rst from our Git commit history using the tags for each version. The configuration is currently tracking upstream exactly except for our version tags being prefixed with "v". Changes ~~~~~~~ - Support for Solr 5+ spelling suggestion format. [Chris Adams] - Set install requirements for Django versions. [Chris Adams] This will prevent accidentally breaking apps when Django 1.10 is released. Closes #1375 - Avoid double-query for queries matching no results. [Chris Adams] - Update supported/tested Django versions. [Chris Adams] * setup.py install_requires uses `>=1.8` to match our current test matrix * Travis allows failures for Django 1.10 so we can start tracking the upcoming release - Make backend subclassing easier. [Chris Adams] This change allows the backend build_search_kwargs to accept arbitrary extra arguments, making life easier for authors of `SearchQuery` or `SearchBackend` subclasses when they can directly pass a value which is directly supported by the backend search client. - Update_index logging & multiprocessing improvements. [Chris Adams] * Since older versions of Python are no longer supported we no longer conditionally import multiprocessing (see #1001) * Use multiprocessing.log_to_stderr for all messages * Remove previously-disabled use of the multiprocessing workers for index removals, allowing the worker code to be simplified - Moved signal processor loading to app_config.ready. [Chris Adams] Thanks to @claudep for the patch Closes #1260 - Handle `__in=[]` gracefully on Solr. [Chris Adams] This commit avoids the need to check whether a list is empty to avoid an error when using it for an `__in` filter. Closes #358 Closes #1311 Fix ~~~ - Attribute resolution on models which have a property named `all` (#1405) [Henrique Chehad] Thanks to Henrique Chehad (@henriquechehad) for the patch Closes #1404 - Tests will fall back to the Apache archive server. [Chris Adams] The Apache 4.10.4 release was quietly removed from the mirrors without a redirect. Until we have time to add newer Solr releases to the test suite we'll download from the archive and let the Travis build cache store it. - Whoosh backend support for RAM_STORE (closes #1386) [Martin Owens] Thanks to @doctormo for the patch - Unsafe update_worker multiprocessing sessions. [Chris Adams] The `update_index` management command does not handle the `multiprocessing` environment safely. On POSIX systems, `multiprocessing` uses `fork()` which means that when called in a context such as the test suite where the connection has already been used some backends like pysolr or ElasticSearch may have an option socket connected to the search server and that leaves a potential race condition where HTTP requests are interleaved, producing unexpected errors. This commit resets the backend connection inside the workers and has been stable across hundreds of runs, unlike the current situation where a single-digit number of runs would almost certainly have at least one failure. Other improvements: * Improved sanity checks for indexed documents in management command test suite. This wasn’t actually the cause of the problem above but since I wrote it while tracking down the real problem there’s no reason not to use it. * update_index now checks that each block dispatched was executed to catch any possible silent failures. Closes #1376 See #1001 - Tests support PyPy. [Chris Adams] PyPy has an optimization which causes it to call __len__ when running a list comprehension, which is the same thing Python does for `list(iterable)`. This commit simply changes the test code to always use `list` the PyPy behaviour matches CPython. - Avoid an extra query on empty spelling suggestions. [Chris Adams] None was being used as a placeholder to test whether to run a spelling suggestion query but was also a possible response when the backend didn’t return a suggestion, which meant that calling `spelling_suggestion()` could run a duplicate query. - MultiValueField issues with single value (#1364) [Arjen Verstoep] Thanks to @terr for the patch! - Queryset slicing and reduced code duplication. [Craig de Stigter] Now pagination will not lazy-load all earlier pages before returning the result. Thanks to @craigds for the patch Closes #1269 Closes #960 - Handle negative timestamps returned from ES. [Chris Adams] Elastic search can return negative timestamps for histograms if the dates are pre-1970. This PR properly handles these pre-1970 dates. Thanks to @speedplane for the patch Closes #1239 - SearchMixin allows form initial values. [Chris Adams] Thanks to @ahoho for the patch Closes #1319 - Graceful handling of empty __in= lists on ElasticSearch. [Chris Adams] Thanks to @boulderdave for the ES version of #1311 Closes #1335 Other ~~~~~ - Docs: update unsupported backends notes. [Chris Adams] * Officially suggest developing backends as separate projects * Recommend Sphinx users consider django-sphinxql - V2.5.0. [Chris Adams] - Bump version to 2.5.dev2. [Chris Adams] - AUTHORS. [Tim Babych] - Expand my username into name in changelog.txt. [Tim Babych] - Corrected non-ascii characters in comments. (#1390) [Mark Walker] - Add lower and upper bounds for django versions. [Simon Hanna] - Convert readthedocs link for their .org -> .io migration for hosted projects. [Adam Chainz] As per [their blog post of the 27th April](https://blog.readthedocs.com/securing-subdomains/) ‘Securing subdomains’: > Starting today, Read the Docs will start hosting projects from subdomains on the domain readthedocs.io, instead of on readthedocs.org. This change addresses some security concerns around site cookies while hosting user generated data on the same domain as our dashboard. Test Plan: Manually visited all the links I’ve modified. - V2.5.dev1. [Chris Adams] - Merge pull request #1349 from sbussetti/master. [Chris Adams] Fix logging call in `update_index` - Fixes improper call to logger in mgmt command. [sbussetti] - Merge pull request #1340 from claudep/manage_commands. [Chris Adams] chg: migrate management commands to argparse - Updated management commands from optparse to argparse. [Claude Paroz] This follows Django's same move and prevents deprecation warnings. Thanks Mario César for the initial patch. - Merge pull request #1225 from gregplaysguitar/patch-1. [Chris Adams] fix: correct docstring for ModelSearchForm.get_models !minor - Fix bogus docstring. [Greg Brown] - Merge pull request #1328 from claudep/travis19. [Chris Adams] Updated test configs to include Django 1.9 - Updated test configs to include Django 1.9. [Claude Paroz] - Merge pull request #1313 from chrisbrooke/Fix-elasticsearch-2.0-meta- data-changes. [Chris Adams] - Remove boost which is now unsupported. [Chris Brooke] - Fix concurrency issues when building UnifiedIndex. [Chris Adams] We were getting this error a lot when under load in a multithreaded wsgi environment: Model '%s' has more than one 'SearchIndex`` handling it. Turns out the connections in haystack.connections and the UnifiedIndex instance were stored globally. However there is a race condition in UnifiedIndex.build() when multiple threads both build() at once, resulting in the above error. Best fix is to never share the same engine or UnifiedIndex across multiple threads. This commit does that. Closes #959 Closes #615 - Load connection routers lazily. [Chris Adams] Thanks to Tadas Dailyda (@skirsdeda) for the patch Closes #1034 Closes #1296 - DateField/DateTimeField accept strings values. [Chris Adams] Now the convert method will be called by default when string values are received instead of the normal date/datetime values. Closes #1188 - Fix doc ReST warning. [Chris Adams] - Merge pull request #1297 from martinsvoboda/patch-1. [Sam Peka] Highlight elasticsearch 2.X is not supported yet - Highlight in docs that elasticsearch 2.x is not supported yet. [Martin Svoboda] - Start updating compatibility notes. [Chris Adams] * Deprecate versions of Django which are no longer supported by the Django project team * Update ElasticSearch compatibility messages * Update Travis / Tox support matrix - Merge pull request #1287 from ses4j/patch-1. [Sam Peka] Remove duplicated SITE_ID from test_haystack/settings.py - Remove redundant SITE_ID which was duplicated twice. [Scott Stafford] - Add ``fuzzy`` operator to SearchQuerySet. [Chris Adams] This exposes the backends’ native fuzzy query support. Thanks to Ana Carolina (@anacarolinats) and Steve Bussetti (@sbussetti) for the patch. - Merge pull request #1281 from itbabu/python35. [Justin Caratzas] Add python 3.5 to tests - Add python 3.5 to tests. [Marco Badan] ref: https://docs.djangoproject.com/en/1.9/faq/install/#what-python-version-can-i-use-with-django - SearchQuerySet: don’t trigger backend access in __repr__ [Chris Adams] This can lead to confusing errors or performance issues by triggering backend access at unexpected locations such as logging. Closes #1278 - Merge pull request #1276 from mariocesar/patch-1. [Chris Adams] Use compatible get_model util to support new django versions Thanks to @mariocesar for the patch! - Reuse haystack custom get model method. [Mario César Señoranis Ayala] - Removed unused import. [Mario César Señoranis Ayala] - Use compatible get_model util to support new django versions. [Mario César Señoranis Ayala] - Merge pull request #1263 from dkarchmer/patch-1. [Chris Adams] Update views_and_forms.rst - Update views_and_forms.rst. [David Karchmer] After breaking my head for an hour, I realized the instructions to upgrade to class based views is incorrect. It should indicate that switch from `page` to `page_obj` and not `page_object` v2.3.2 (2015-11-11) ------------------- - V2.3.2 maintenance update. [Chris Adams] - Fix #1253. [choco] - V2.3.2 pre-release version bump. [Chris Adams] - Allow individual records to be skipped while indexing. [Chris Adams] Previously there was no easy way to skip specific objects other than filtering the queryset. This change allows a prepare method to raise `SkipDocument` after calling methods or making other checks which cannot easily be expressed as database filters. Thanks to Felipe Prenholato (@chronossc) for the patch Closes #380 Closes #1191 v2.4.1 (2015-10-29) ------------------- - V2.4.1. [Chris Adams] - Minimal changes to the example project to allow test use. [Chris Adams] - Merge remote-tracking branch 'django-haystack/pr/1261' [Chris Adams] The commit in #1252 / #1251 was based on the assumption that the tutorial used the new generic views, which is not yet correct. This closes #1261 by restoring the wording and adding some tests to avoid regressions in the future before the tutorial is overhauled. - Rename 'page_obj' with 'page' in the tutorial, section Search Template as there is no 'page_obj' in the controller and this results giving 'No results found' in the search. [bboneva] - Style cleanup. [Chris Adams] * Remove duplicate & unused imports * PEP-8 indentation & whitespace * Use `foo not in bar` instead of `not foo in bar` - Update backend logging style. [Chris Adams] * Make Whoosh message consistent with the other backends * Pass exception info to loggers in except: blocks * PEP-8 - Avoid unsafe default value on backend clear() methods. [Chris Adams] Having a mutable structure like a list as a default value is unsafe; this commit changes that to the standard None. - Merge pull request #1254 from chocobn69/master. [Chris Adams] Update for API change in elasticsearch 1.8 (closes #1253) Thanks to @chocobn69 for the patch - Fix #1253. [choco] - Tests: update Solr launcher for changed mirror format. [Chris Adams] The Apache mirror-detection script appears to have changed its response format recently. This change handles that and makes future error messages more explanatory. - Bump doc version numbers - closes #1105. [Chris Adams] - Merge pull request #1252 from rhemzo/master. [Chris Adams] Update tutorial.rst (closes #1251) Thanks to @rhemzo for the patch - Update tutorial.rst. [rhemzo] change page for page_obj - Merge pull request #1240 from speedplane/improve-cache-fill. [Chris Adams] Use a faster implementation of query result cache - Use a faster implementation of this horrible cache. In my tests it runs much faster and uses far less memory. [speedplane] - Merge pull request #1149 from lovmat/master. [Chris Adams] FacetedSearchMixin bugfixes and improvements * Updated documentation & example code * Fixed inheritance chain * Added facet_fields Thanks to @lovmat for the patch - Updated documentation, facet_fields attribute. [lovmat] - Added facet_fields attribute. [lovmat] Makes it easy to include facets into FacetedSearchVIew - Bugfixes. [lovmat] - Merge pull request #1232 from dlo/patch-1. [Chris Adams] Rename elasticsearch-py to elasticsearch in docs Thanks to @dlo for the patch - Rename elasticsearch-py to elasticsearch in docs. [Dan Loewenherz] - Update wording in SearchIndex get_model exception. [Chris Adams] Thanks to Greg Brown (@gregplaysguitar) for the patch Closes #1223 - Corrected exception wording. [Greg Brown] - Allow failures on Python 2.6. [Chris Adams] Some of our test dependencies like Mock no longer support it. Pinning Mock==1.0.1 on Python 2.6 should avoid that failure but the days of Python 2.6 are clearly numbered. - Travis: stop testing unsupported versions of Django on Python 2.6. [Chris Adams] - Use Travis’ matrix support rather than tox. [Chris Adams] This avoids a layer of build setup and makes the Travis console reports more useful - Tests: update the test version of Solr in use. [Chris Adams] 4.7.2 has disappeared from most of the Apache mirrors v2.4.0 (2015-06-09) ------------------- - Release 2.4.0. [Chris Adams] - Merge pull request #1208 from ShawnMilo/patch-1. [Chris Adams] Fix a typo in the faceting docs - Possible typo fix. [Shawn Milochik] It seems that this was meant to be results. - 2.4.0 release candidate 2. [Chris Adams] - Fix Django 1.9 deprecation warnings. [Ilan Steemers] * replaced get_model with haystack_get_model which returns the right function depending on the Django version * get_haystack_models is now compliant with > Django 1.7 Closes #1206 - Documentation: update minimum versions of Django, Python. [Chris Adams] - V2.4.0 release candidate. [Chris Adams] - Bump version to 2.4.0.dev1. [Chris Adams] - Travis: remove Django 1.8 from allow_failures. [Chris Adams] - Tests: update test object creation for Django 1.8. [Chris Adams] Several of the field tests previously assigned a related test model instance before saving it:: mock_tag = MockTag(name='primary') mock = MockModel() mock.tag = mock_tag Django 1.8 now validates this dodgy practice and throws an error. This commit simply changes it to use `create()` so the mock_tag will have a pk before assignment. - Update AUTHORS. [Chris Adams] - Tests: fix deprecated Manager.get_query_set call. [Chris Adams] - Updating haystack to test against django 1.8. [Chris Adams] Updated version of @troygrosfield's patch updating the test-runner for Django 1.8 Closes #1175 - Travis: allow Django 1.8 failures until officially supported. [Chris Adams] See #1175 - Remove support for Django 1.5, add 1.8 to tox/travis. [Chris Adams] The Django project does not support 1.5 any more and it's the source of most of our false-positive test failures - Use db.close_old_connections instead of close_connection. [Chris Adams] Django 1.8 removed the `db.close_connection` method. Thanks to Alfredo Armanini (@phingage) for the patch - Fix mistake in calling super TestCase method. [Ben Spaulding] Oddly this caused no issue on Django <= 1.7, but it causes numerous errors on Django 1.8. - Correct unittest imports from commit e37c1f3. [Ben Spaulding] - Prefer stdlib unittest over Django's unittest2. [Ben Spaulding] There is no need to fallback to importing unittest2 because Django 1.5 is the oldest Django we support, so django.utils.unittest is guaranteed to exist. - Prefer stdlib OrderedDict over Django's SortedDict. [Ben Spaulding] The two are not exactly they same, but they are equivalent for Haystack's needs. - Prefer stdlib importlib over Django's included version. [Ben Spaulding] The app_loading module had to shuffle things a bit. When it was importing the function it raised a [RuntimeError][]. Simply importing the module resolved that. [RuntimeError]: https://gist.github.com/benspaulding/f36eaf483573f8e5f777 - Docs: explain how field boosting interacts with filter. [Chris Adams] Thanks to @amjoconn for contributing a doc update to help newcomers Closes #1043 - Add tests for values/values_list slicing. [Chris Adams] This confirms that #1019 is fixed - Update_index: avoid gaps in removal logic. [Chris Adams] The original logic did not account for the way removing records interfered with the pagination logic. Closes #1194 - Update_index: don't use workers to remove stale records. [Chris Adams] There was only minimal gain to this because, unlike indexing, removal is a simple bulk operation limited by the search engine. See #1194 See #1201 - Remove lxml dependency. [Chris Adams] pysolr 3.3.2+ no longer requires lxml, which saves a significant install dependency - Allow individual records to be skipped while indexing. [Chris Adams] Previously there was no easy way to skip specific objects other than filtering the queryset. This change allows a prepare method to raise `SkipDocument` after calling methods or making other checks which cannot easily be expressed as database filters. Thanks to Felipe Prenholato (@chronossc) for the patch Closes #380 Closes #1191 - Update_index: avoid "MySQL has gone away error" with workers. [Eric Bressler (Platform)] This fixes an issue with a stale database connection being passed to a multiprocessing worker when using `--remove` Thanks to @ebressler for the patch Closes #1201 - Depend on pysolr 3.3.1. [Chris Adams] - Start-solr-test-server: avoid Travis dependency. [Chris Adams] This will now fall back to the current directory when run outside of our Travis-CI environment - Fix update_index --remove handling. [Chris Adams] * Fix support for custom keys by reusing the stored value rather than regenerating following the default pattern * Batch remove operations using the total number of records in the search index rather than the database Closes #1185 Closes #1186 Closes #1187 - Merge pull request #1177 from paulshannon/patch-1. [Chris Adams] Update TravisCI link in README - Update TravisCI link. [Paul Shannon] I think the repo got changed at some point and the old project referenced at travisci doesn't exist anymore... - Travis: enable containers. [Chris Adams] * Move apt-get installs to the addons/apt_packages: http://docs.travis-ci.com/user/apt-packages/ * Set `sudo: false` to enable containers: http://docs.travis-ci.com/user/workers/container-based-infrastructure/ - Docs: correct stray GeoDjango doc link. [Chris Adams] - Document: remove obsolete Whoosh Python 3 warning. [Chris Adams] Thanks to @gitaarik for the pull request Closes #1154 Fixes #1108 - Remove method_decorator backport (closes #1155) [Chris Adams] This was no longer used anywhere in the Haystack source or documentation - Travis: enable APT caching. [Chris Adams] - Travis: update download caching. [Chris Adams] - App_loading cleanup. [Chris Adams] * Add support for Django 1.7+ AppConfig * Rename internal app_loading functions to have haystack_ prefix to make it immediately obvious that they are not Django utilities and start * Add tests to avoid regressions for apps nested with multiple levels of module hierarchy like `raven.contrib.django.raven_compat` * Refactor app_loading logic to make it easier to remove the legacy compatibility code when we eventually drop support for older versions of Django Fixes #1125 Fixes #1150 Fixes #1152 Closes #1153 - Switch defaults closer to Python 3 defaults. [Chris Adams] * Add __future__ imports: isort --add_import 'from __future__ import absolute_import, division, print_function, unicode_literals' * Add source encoding declaration header - Setup.py: use strict PEP-440 dev version. [Chris Adams] The previous version was valid as per PEP-440 but triggers a warning in pkg_resources - Merge pull request #1146 from kamilmowinski/patch-1. [Chris Adams] Fix typo in SearchResult documentation - Update searchresult_api.rst. [kamilmowinski] - Merge pull request #1143 from wicol/master. [Chris Adams] Fix deprecation warnings in Django 1.6.X (thanks @wicol) - Fix deprecation warnings in Django 1.6.X. [Wictor] Options.model_name was introduced in Django 1.6 together with a deprecation warning: https://github.com/django/django/commit/ec469ade2b04b94bfeb59fb0fc7d9300470be615 - Travis: move tox setup to before_script. [Chris Adams] This should cause dependency installation problems to show up as build errors rather than outright failures - Update ElasticSearch defaults to allow autocompleting numbers. [Chris Adams] Previously the defaults for ElasticSearch used the `lowercase` tokenizer, which prevented numbers from being autocompleted. Thanks to Phill Tornroth (@phill-tornroth) for contributing a patch which changes the default settings to use the `standard` tokenizer with the `lowercase` filter Closes #1056 - Update documentation for new class-based views. [Chris Adams] Thanks to @troygrosfield for the pull-request Closes #1139 Closes #1133 See #1130 - Added documentation for configuring facet behaviour. [Chris Adams] Thanks to Philippe Luickx for the contribution Closes #1111 - UnifiedIndex has a stable interface to get all indexes. [Chris Adams] Previously it was possible for UnifiedIndexes.indexes to be empty when called before the list had been populated. This change deprecates accessing `.indexes` directly in favor of a `get_indexes()` accessor which will call `self.build()` first if necessary. Thanks to Phill Tornroth for the patch and tests. Closes #851 - Add support for SQ in SearchQuerySet.narrow() (closes #980) [Chris Adams] Thanks to Andrei Fokau (@andreif) for the patch and tests - Disable multiprocessing on Python 2.6 (see #1001) [Chris Adams] multiprocessing.Pool.join() hangs reliably on Python 2.6 but not any later version tested. Since this is an optional feature we’ll simply disable it - Bump version number to 2.4.0-dev. [Chris Adams] - Update_index: wait for all pool workers to finish. [Chris Adams] There was a race condition where update_index() would return before all of the workers had finished updating Solr. This manifested itself most frequently as Travis failures for the multiprocessing test (see #1001). - Tests: Fix ElasticSearch index setup (see #1093) [Chris Adams] Previously when clear_elasticsearch_index() was called to reset the tests, this could produce confusing results because it cleared the mappings without resetting the backend’s setup_complete status and thus fields which were expected to have a specific type would end up being inferred With this changed test_regression_proper_start_offsets and test_more_like_this no longer fail - Update rebuild_index --nocommit handling and add tests. [Chris Adams] rebuild_index builds its option list by combining the options from clear_index and update_index. This previously had a manual exclude list for options which were present in both commands to avoid conflicts but the nocommit option wasn't in that list. This wasn't tested because our test suite uses call_command rather than invoking the option parser directly. This commit also adds tests to confirm that --nocommit will actually pass commit=False to clear_index and update_index. Closes #1140 See #1090 - Support ElasticSearch 1.x distance filter syntax (closes #1003) [Chris Adams] The elasticsearch 1.0 release was backwards incompatible with our previous usage. Thanks to @dulaccc for the patch adding support. - Docs: add Github style guide link to pull request instructions. [Chris Adams] The recent Github blog post makes a number of good points: https://github.com/blog/1943-how-to-write-the-perfect-pull-request - Fixed exception message when resolving model_attr. [Wictor] This fixes the error message displayed when model_attr references an unknown attribute. Thanks to @wicol for the patch Closes #1094 - Compatibility with Django 1.7 app loader (see #1097) [Chris Adams] * Added wrapper around get_model, so that Django 1.7 uses the new app loading mechanism. * Added extra model check to prevent that a simple module is treated as model. Thanks to Dirk Eschler (@deschler) for the patch. - Fix index_fieldname to match documentation (closes #825) [Chris Adams] @jarig contributed a fix to ensure that index_fieldname renaming does not interfere with using the field name declared on the index. - Add tests for Solr/ES spatial order_by. [Chris Adams] This exists primarily to avoid the possibility of breaking compatibility with the inconsistent lat, lon ordering used by Django, Solr and ElasticSearch. - Remove undocumented `order_by_distance` [Chris Adams] This path was an undocumented artifact of the original geospatial feature-branch back in the 1.X era. It wasn’t documented and is completely covered by the documented API. - ElasticSearch tests: PEP-8 cleanup. [Chris Adams] - Implement managers tests for spatial features. [Chris Adams] This is largely shadowed by the actual spatial tests but it avoids surprises on the query generation * Minor PEP-8 - Remove unreferenced add_spatial methods. [Chris Adams] SolrSearchQuery and ElasticsearchSearchQuery both defined an `add_spatial` method which was neither called nor documented. - Remove legacy httplib/httplib2 references. [Chris Adams] We’ve actually delegated the actual work to requests but the docs & tests had stale references - Tests: remove legacy spatial backend code. [Chris Adams] This has never run since the solr_native_distance backend did not exist and thus the check always failed silently - ElasticSearch backend: minor PEP-8 cleanup. [Chris Adams] - Get-solr-download-url: fix Python 3 import path. [Chris Adams] This allows the scripts to run on systems where Python 3 is the default version - Merge pull request #1130 from troygrosfield/master. [Chris Adams] Added generic class based search views (thanks @troygrosfield) - Removed "expectedFailure". [Troy Grosfield] - Minor update. [Troy Grosfield] - Added tests for the generic search view. [Troy Grosfield] - Hopefully last fix for django version checking. [Troy Grosfield] - Fix for django version check. [Troy Grosfield] - Adding fix for previously test for django 1.7. [Troy Grosfield] - Adding py34-django1.7 to travis. [Troy Grosfield] - Test for the elasticsearch client. [Troy Grosfield] - Added unicode_literals import for py 2/3 compat. [Troy Grosfield] - Added generic class based search views. [Troy Grosfield] - Merge pull request #1101 from iElectric/nothandledclass. [Chris Adams] Report correct class when raising NotHandled - Report correct class when raising NotHandled. [Domen Kožar] - Merge pull request #1090 from andrewschoen/feature/no-commit-flag. [Chris Adams] Adds a --nocommit arg to the update_index, clear_index and rebuild_index management command. - Adds a --nocommit arg to the update_index, clear_index and rebuild_index management commands. [Andrew Schoen] - Merge pull request #1103 from pkafei/master. [Chris Adams] Update documentation to reference Solr 4.x - Changed link to official archive site. [Portia Burton] - Added path to schema.xml. [Portia Burton] - Added latest version of Solr to documentation example. [Portia Burton] - Update ElasticSearch version requirements. [Chris Adams] - Elasticsearch's python api by default has _source set to False, this causes keyerror mentioned in bug #1019. [xsamurai] - Solr: clear() won’t call optimize when commit=False. [Chris Adams] An optimize will trigger a commit implicitly so we’ll avoid calling it when the user has requested not to commit - Bumped __version__ (closes #1112) [Dan Watson] - Travis: allow PyPy builds to fail. [Chris Adams] This is currently unstable and it's not a first-class supported platform yet - Tests: fix Solr server tarball test. [Chris Adams] On a clean Travis instance, the tarball won't exist - Tests: have Solr test server startup script purge corrupt tarballs. [Chris Adams] This avoids tests failing if a partial download is cached by Travis - Merge pull request #1084 from streeter/admin-mixin. [Daniel Lindsley] Document and add an admin mixin - Document support for searching in the Django admin. [Chris Streeter] - Add some spacing. [Chris Streeter] - Create an admin mixin for external use. [Chris Streeter] There are cases where one might have a different base admin class, and wants to use the search features in the admin as well. Creating a mixin makes this a bit cleaner. v2.3.1 (2014-09-22) ------------------- - V2.3.1. [Chris Adams] - Tolerate non-importable apps like django-debug-toolbar. [Chris Adams] If your installed app isn't even a valid Python module, haystack will issue a warning but continue. Thanks to @gojomo for the patch Closes #1074 Closes #1075 - Allow apps without models.py on Django <1.7. [Chris Adams] This wasn't officially supported by Django prior to 1.7 but is used by some third-party apps such as Grappelli This commit adds a somewhat contrived test app to avoid future regressions by ensuring that the test suite always has an application installed which does not have models.py See #1073 v2.3.0 (2014-09-19) ------------------- - Travis: Enable IRC notifications. [Chris Adams] - Fix app loading call signature. [Chris Adams] Updated code from #1016 to ensure that get_models always returns a list (previously on Django 1.7 it would return the bare model when called with an argument of the form `app.modelname`) Add some basic tests - App loading: use ImproperlyConfigured for bogus app names. [Chris Adams] This never worked but we’ll be more consistent and return ImproperlyConfigured instead of a generic LookupError - App Loading: don’t suppress app-registry related exceptions. [Chris Adams] This is just asking for trouble in the future. If someone comes up with an edge case, we should add a test for it - Remove Django version pin from install_requires. [Chris Adams] - Django 1.7 support for app discovery. [Chris Adams] * Refactored @Xaroth’s patch from #1015 into a separate utils module * PEP-8 cleanup - Start the process of updating for v2.3 release. [Chris Adams] - Django 1.7 compatibility for model loading. [Chris Adams] This refactors the previous use of model _meta.module_name and updates the tests so the previous change can be tested safely. Closes #981 Closes #982 - Update tox Django version pins. [Chris Adams] - Mark expected failures for Django 1.7 (see #1069) [Chris Adams] - Django 1.7: ensure that the app registry is ready before tests are loaded. [Chris Adams] The remaining test failures are due to some of the oddities in model mocking, which can be solved by overhauling the way we do tests and mocks. - Tests: Whoosh test overhaul. [Chris Adams] * Move repetitive filesystem reset logic into WhooshTestCase which cleans up after itself * Use mkdtemp instead of littering up the current directory with a 'tmp' subdirectory * Use skipIf rather than expectFailure on test_writable to disable it only when STORAGE=ram rather than always - Unpin elasticsearch library version for testing. [Chris Adams] - Tests: add MIDDLEWARE_CLASSES for Django 1.7. [Chris Adams] - Use get_model_ct_tuple to generate template name. [Chris Adams] - Refactor simple_backend to use get_model_ct_tuple. [Chris Adams] - Haystack admin: refactor to use get_model_ct_tuple. [Chris Adams] - Consolidate model meta references to use get_model_ct (see #981) [Chris Adams] This use of a semi-public Django interface will break in Django 1.7 and we can start preparing by using the existing haystack.utils.get_model_ct function instead of directly accessing it everywhere. - Refactor get_model_ct to handle Django 1.7, add tuple version. [Chris Adams] We have a mix of model _meta access which usually expects strings but in a few places needs raw values. This change adds support for Django 1.7 (see https://code.djangoproject.com/ticket/19689) and allows raw tuple access to handle other needs in the codebase - Add Django 1.7 warning to Sphinx docs as well. [Chris Adams] v2.2.1 (2014-09-03) ------------------- - Mark 2.2.X as incompatible with Django 1.7. [Chris Adams] - Tests: don't suppress Solr stderr logging. [Chris Adams] This will make easier to tell why Solr sometimes goes away on Travis - Update Travis & Tox config. [Chris Adams] * Tox: wait for Solr to start before running tests * Travis: allow solr & pip downloads to be cached * Travis now uses start-solr-test-server.sh instead of travis-solr * Test Solr configuration uses port 9001 universally as per the documentation * Change start-solr-test-server.sh to change into its containing directory, which also allows us to remove the realpath dependency * Test Solr invocation matches pysolr * Use get-solr-download-url script to pick a faster mirror * Upgrade to Solr 4.7.2 - Travis, Tox: add Django 1.7 targets. [Chris Adams] - Merge pull request #1055 from andreif/feature/realpath-fallback-osx. [Chris Adams] - Fallback to pwd if realpath is not available. [Andrei Fokau] - Merge pull request #1053 from gandalfar/patch-1. [Chris Adams] - Update example for Faceting to reference page.object_list. [Jure Cuhalev] Instead of `results` - ref #1052 - Add PyPy targets to Tox & Travis. [Chris Adams] Closes #1049 - Merge pull request #1044 from areski/patch-1. [Chris Adams] Update Xapian install instructions (thanks @areski) - Update Xapian install. [Areski Belaid] - Docs: fix signal processors link in searchindex_api. [Chris Adams] Correct a typo in b676b17dbc4b29275a019417e7f19f531740f05e - Merge pull request #1050 from jogwen/patch-2. [Chris Adams] - Link to 'signal processors' [Joanna Paulger] - Merge pull request #1047 from g3rd/patch-1. [Chris Adams] Update the installing search engine documentation URL (thanks @g3rd) - Fixed the installing search engine doc URL. [Chad Shrock] - Merge pull request #1025 from reinout/patch-1. [Chris Adams] Fixed typo in templatetag docs example (thanks to @reinout) - Fixed typo in example. [Reinout van Rees] It should be `css_class` in the template tag example instead of just `class`. (It is mentioned correctly in the syntax line earlier). v2.2.0 (2014-08-03) ------------------- - Release v2.2.0. [Chris Adams] - Test refactor - merge all the tests into one test suite (closes #951) [Chris Adams] Major refactor by @honzakral which stabilized the test suite, makes it easier to run and add new tests and somewhat faster, too. * Merged all the tests * Mark tests as skipped when a backend is not available (e.g. no ElasticSearch or Solr connection) * Massively simplified test runner (``python setup.py test``) Minor updates: * Travis: - Test Python 3.4 - Use Solr 4.6.1 * Simplified legacy test code which can now be replaced by the test utilities in newer versions of Django * Update ElasticSearch client & tests for ES 1.0+ * Add option for SearchModelAdmin to specify the haystack connection to use * Fixed a bug with RelatedSearchQuerySet caching using multiple instances (429d234) - RelatedSearchQuerySet: move class globals to instance properties. [Chris Adams] This caused obvious failures in the test suite and presumably elsewhere when multiple RelatedSearchQuerySet instances were in use - Merge pull request #1032 from maikhoepfel/patch-1. [Justin Caratzas] Drop unused variable when post-processing results - Drop unused variable when post-processing results. [Maik Hoepfel] original_results is not used in either method, and can be safely removed. - 404 when initially retrieving mappings is ok. [Honza Král] - Ignore 400 (index already exists) when creating an index in Elasticsearch. [Honza Král] - ElasticSearch: update clear() for 1.x+ syntax. [Chris Adams] As per http://www.elasticsearch.org/guide/en/elasticsearch/reference/1.x/docs-delete-by-query.html this should be nested inside a top-level query block: {“query”: {“query_string”: …}} - Add setup.cfg for common linters. [Chris Adams] - ElasticSearch: avoid KeyError for empty spelling. [Chris Adams] It was possible to get a KeyError when spelling suggestions were requested but no suggestions are returned by the backend. Thanks to Steven Skoczen (@skoczen) for the patch - Merge pull request #970 from tobych/patch-3. [Justin Caratzas] Improve punctuation in super-scary YMMV warning - Improve punctuation in super-scary YMMV warning. [Toby Champion] - Merge pull request #969 from tobych/patch-2. [Justin Caratzas] Fix typo; clarify purpose of search template - Fix typo; clarify purpose of search template. [Toby Champion] - Merge pull request #968 from tobych/patch-1. [Justin Caratzas] Fix possessive "its" in tutorial.rst - Fix possessive "its" [Toby Champion] - Merge pull request #938 from Mbosco/patch-1. [Daniel Lindsley] Update tutorial.rst - Update tutorial.rst. [BoscoMW] - Fix logging call in SQS post_process_results (see #648) [Chris Adams] This was used in an except: handler and would only be executed when a load_all() queryset retrieved a model which wasn't registered with the index. - Merge pull request #946 from gkaplan/spatial-docs-fix. [Daniel Lindsley] Small docs fix for spatial search example code - Fix typo with instantiating Distance units. [Graham Kaplan] - Solr backend: correct usage of pysolr delete. [Chris Adams] We use HAYSTACK_ID_FIELD in other places but the value passed to pysolr's delete() method must use the keyword argument ``id``: https://github.com/toastdriven/pysolr/blob/v3.1.0/pysolr.py#L756 Although the value is passed to Solr an XML tag named ```` it will always be checked against the actual ``uniqueKey`` field even if it uses a custom name: https://wiki.apache.org/solr/UpdateXmlMessages#A.22delete.22_documents_by_ID_and_by_Query Closes #943 - Add a note on elasticsearch-py versioning with regards to 1.0. [Honza Král] - Ignore 404 when removing a document from elasticsearch. [Honza Král] Fixes #942 - Ignore missing index during .clear() [Honza Král] 404 in indices.delete can only mean that the index is there, no issue for a delete operation Fixes #647 - Tests: remove legacy targets. [Chris Adams] * Django 1.4 is no longer supported as per the documentation * Travis: use Python 3.3 targets instead of 3.2 - Tests: update pysolr requirement to 3.1.1. [Chris Adams] 3.1.1 shipped a fix for a change in the Solr response format for the content extraction handler - Merge pull request #888 from acdha/888-solr-field-list-regression. [Chris Adams] Solr / ElasticSearch backends: restore run() kwargs handling This fixes an earlier regression which did not break functionality but made `.values()` and `.values_list()` much less of an optimization than intended. #925 will be a more comprehensive refactor but this is enough of a performance win to be worth including if a point release happens before #925 lands. - ElasticSearch backend: run() kwargs are passed directly to search backend. [Chris Adams] This allows customization by subclasses and also fixes #888 by ensuring that the custom field list prepared by `ValuesQuerySet` and `ValuesListQuerySet` is actually used. - Solr backend: run() kwargs are passed directly to search backend. [Chris Adams] This allows customization by subclasses and also fixes #888 by ensuring that the custom field list prepared by `ValuesQuerySet` and `ValuesListQuerySet` is actually used. - Tests: skip Solr content extraction with old PySolr. [Chris Adams] Until pysolr 3.1.1 ships there's no point in running the Solr content extraction tests because they'll fail: https://github.com/toastdriven/pysolr/pull/104 - Make sure DJANGO_CT and DJANGO_ID fields are not analyzed. [Honza Král] - No need to store fields separately in elasticsearch. [Honza Král] That will justlead to fields being stored once - as part of _source as well as in separate index that would never be used by haystack (would be used only in special cases when requesting just that field, which can be, with minimal overhead, still just extracted from the _source as it is). - Remove extra code. [Honza Král] - Simplify mappings for elasticsearch fields. [Honza Král] - don't specify defaults (index:analyzed for strings, boost: 1.0) - omit extra settings that have little or negative effects (term_vector:with_positions_offsets) - only use type-specific settings (not_analyzed makes no sense for non-string types) Fixes #866 - Add narrow queries as individual subfilter to promote caching. [Honza Král] Each narrow query will be cached individually which means more cache reuse - Doc formatting fix. [Honza Král] - Allow users to pass in additional kwargs to Solr and Elasticsearch backends. [Honza Král] Fixes #674, #862 - Whoosh: allow multiple order_by() fields. [Chris Adams] The Whoosh backend previously prevented the use of more than one order_by field. It now allows multiple fields as long as every field uses the same sort direction. Thanks to @qris, @overflow for the patch Closes #627 Closes #919 - Fix bounding box calculation for spatial queries (closes #718) [Chris Adams] Thanks @jasisz for the fix - Docs: fix ReST syntax error in searchqueryset_api.rst. [Chris Adams] - Tests: update test_more_like_this for Solr 4.6. [Chris Adams] - Tests: update test_quotes_regression exception test. [Chris Adams] This was previously relying on the assumption that a query would not match, which is Solr version dependent, rather than simply confirming that no exception is raised - Tests: update Solr schema to match current build_solr_schema. [Chris Adams] * Added fields used in spatial tests: location, username, comment * Updated schema for recent Solr * Ran `xmllint --c14n "$*" | xmllint --format --encode "utf-8" -` - Tests: update requirements to match tox. [Chris Adams] - Move test Solr instructions into a script. [Chris Adams] These will just rot horribly if they're not actually executed on a regular basis… - Merge pull request #907 from gam-phon/patch-1. [Chris Adams] - Fix url for solr 3.5.0. [Yaser Alraddadi] - Merge pull request #775 from stefanw/avoid-pks-seen-on-update. [Justin Caratzas] Avoid unnecessary, potentially huge db query on index update - Merge branch 'master' into avoid-pks-seen-on-update. [Stefan Wehrmeyer] Change smart_text into smart_bytes as in master Conflicts: haystack/management/commands/update_index.py - Upgraded python3 in tox to 3.3. [justin caratzas] 3.3 is a better target for haystack than 3.2, due to PEP414 - Merge pull request #885 from HonzaKral/elasticsearch-py. [Justin Caratzas] Use elasticsearch-py instead of pyelasticsearch. - Use elasticsearch-py instead of pyelasticsearch. [Honza Král] elasticsearch-py is the official Python client for Elasticsearch. - Merge pull request #899 from acdha/html5-input-type=search. [Justin Caratzas] Search form - Use HTML5 (closes #899) [Chris Adams] - Update travis config so that unit tests will run with latest solr + elasticsearch. [justin caratzas] - Merge remote-tracking branch 'HonzaKral/filtered_queries' Fixes #886. [Daniel Lindsley] - Use terms filter for DJANGO_CT, *much* faster. [Honza Král] - Cleaner query composition when it comes to filters in ES. [Honza Král] - Fixed typo in AUTHORS. [justin caratzas] - Added pabluk to AUTHORS. [Pablo SEMINARIO] - Fixed ValueError exception when SILENTLY_FAIL=True. [Pablo SEMINARIO] - Merge pull request #882 from benspaulding/docs/issue-607. [Justin Caratzas] Remove bit about SearchQuerySet.load_all_queryset deprecation - Remove bit about SearchQuerySet.load_all_queryset deprecation. [Ben Spaulding] That method was entirely removed in commit b8048dc0e9e3. Closes #607. Thanks to @bradleyayers for the report. - Merge pull request #881 from benspaulding/docs/issue-606. [Justin Caratzas] Fix documentation regarding ModelSearchIndex to match current behavior - Fix documentation regarding ModelSearchIndex to match current behavior. [Ben Spaulding] Closes #606. Thanks to @bradleyayers for the report. - Fixed #575 & #838, where a change in Whoosh 2.5> required explicitly setting the Searcher.search() limit to None to restore correct results. [Keryn Knight] Thanks to scenable and Shige Abe (typeshige) for the initial reports, and to scenable for finding the root issue in Whoosh. - Removed python 1.4 / python 3.2 tox env because thats not possible. [justin caratzas] also pinned versions of requirements for testing - Added test for autocomplete whitespace fix. [justin caratzas] - Fixed autocomplete() method: spaces in query. [Ivan Virabyan] - Fixed basepython for tox envs, thanks --showconfig. [justin caratzas] also, added latest django 1.4 release, which doesn't error out currently. Downgraded python3.3 to python3.2, as thats what the lastest debian stable includes. I'm working on compiling pypy and python3.3 on the test box, so those will probably be re-added as time allows. failing tests: still solr context extraction + spatial - Fixed simple backend for django 1.6, _fields was removed. [justin caratzas] - [tox] run tests for 1.6, fix test modules so they are found by the new test runner. [justin caratzas] These changes are backwards-compatible with django 1.5. As of this commit, the only failing tests are the Solr extractraction test, and the spatial tests. - Switch solr configs to solr 4. [justin caratzas] almost all tests passing, but spatial not working - Update solr schema template to fix stopwords_en.txt relocation. [Patrick Altman] Seems that in versions >3.6 and >4 stopwords_en.txt moved to a new location. This won't be backwards compatible for older versions of solr. Addresses issues #558, #560 In addition, issue #671 references this problem - Pass `using` to index_queryset for update. [bigjust] - Update tox to test pypy, py26, py27, py33, django1.5 and django1.6. [bigjust] django 1.6 doesn't actually work yet, but there are other efforts to get that working - Fixed my own spelling test case. How embarrassing. [Dan Watson] - Added a spelling test case for ElasticSearch. [Dan Watson] - More ElasticSearch test fixes. [Dan Watson] - Added some faceting tests for ElasticSearch. [Dan Watson] - Fixed ordering issues in the ElasticSearch tests. [Dan Watson] - Merge remote-tracking branch 'infoxchange/fix-elasticsearch-index- settings-reset' [Daniel Lindsley] - Test ensuring recreating the index does not remove the mapping. [Alexey Kotlyarov] - Reset backend state when deleting index. [Alexey Kotlyarov] Reset setup_complete and existing_mapping when an index is deleted. This ensures create_index is called later to restore the settings properly. - Use Django's copy of six. [Dan Watson] - Merge pull request #847 from luisbarrueco/mgmtcmd-fix. [Dan Watson] Fixed an update_index bug when using multiple connections - Fixed an update_index bug when using multiple connections. [Luis Barrueco] - Fixed a missed raw_input call on Python 3. [Dan Watson] - Merge pull request #840 from postatum/fix_issue_807. [Justin Caratzas] Fixed issue #807 - Fixed issue #807. [postatum] - Merge pull request #837 from nicholasserra/signals-docs-fix. [Justin Caratzas] Tiny docs fix in signal_processors example code - Tiny docs fix in signal_processors example code. [Nicholas Serra] - Merge pull request #413 from phill-tornroth/patch-1. [Justin Caratzas] Silly little change, I know.. but I actually ran into a case where I acci - Silly little change, I know.. but I actually ran into a case where I accidentally passed a list of models in without *ing them. When that happens, we get a string formatting exception (not all arguments were formatted) instead of the useful "that ain't a model, kid" business. [Phill Tornroth] - Merge pull request #407 from bmihelac/patch-1. [Justin Caratzas] Fixed doc, ``query`` is context variable and not in request. - Fixed doc, ``query`` is context variable and not in request. [bmihelac] - Merge pull request #795 from davesque/update_excluded_indexes_error_message. [Justin Caratzas] Improve error message for duplicate index classes - Improve error message for duplicate index classes. [David Sanders] To my knowledge, the 'HAYSTACK_EXCLUDED_INDEXES' setting is no longer used. - Started the v2.1.1 work. [Daniel Lindsley] - Avoid unnecessary db query on index update. [Stefan Wehrmeyer] pks_seen is only needed if objects are removed from index, so only compute it if necessary. Improve pks_seen to not build an intermediary list. v2.1.0 (2013-07-28) ------------------- - Bumped to v2.1.0! [Daniel Lindsley] - Python 3 support is done, thanks to RevSys & the PSF! Updated requirements in the docs. [Daniel Lindsley] - Added all the new additions to AUTHORS. [Daniel Lindsley] - Merge branch 'py3' [Daniel Lindsley] - Added Python 3 compatibility notes. [Daniel Lindsley] - Whoosh mostly working under Python 3. See docs for details. [Daniel Lindsley] - Backported things removed from Django 1.6. [Daniel Lindsley] - Final core changes. [Daniel Lindsley] - Solr tests all but passing under Py3. [Daniel Lindsley] - Elasticsearch tests passing under Python 3. [Daniel Lindsley] Requires git master (ES 1.0.0 beta) to work properly when using suggestions. - Overrides passing under Py3. [Daniel Lindsley] - Simple backend ported & passing. [Daniel Lindsley] - Whoosh all but fully working under Python 3. [Daniel Lindsley] - Closer on porting ES. [Daniel Lindsley] - Core tests mostly pass on Py 3. \o/ [Daniel Lindsley] What's left are 3 failures, all ordering issues, where the correct output is present, but ordering is different between Py2 / Py3. - More porting to Py3. [Daniel Lindsley] - Started porting to py3. [Daniel Lindsley] - Merge pull request #821 from knightzero/patch-1. [Justin Caratzas] Update autocomplete.rst - Update autocomplete.rst. [knightzero] - Merge pull request #744 from trigger-corp/master. [Justin Caratzas] Allow for document boosting with elasticsearch - Update the current elasticsearch boost test to also test document boosting. [Connor Dunn] - Map boost field to _boost in elasticsearch. [Connor Dunn] Means that including a boost field in a document will cause document level boosting. - Added ethurgood to AUTHORS. [Daniel Lindsley] - Add test__to_python for elastisearch backend. [Eric Thurgood] - Fix datetime instantiation in elasticsearch backend's _to_python. [Eric Thurgood] - Merge pull request #810 from pabluk/minor-docs-fix. [Chris Adams] Updated description for TIMEOUT setting - thanks @pabluk - Updated description for TIMEOUT setting. [Pablo SEMINARIO] - Updated the backend support docs. Thanks to kezabelle & dimiro1 for the report! [Daniel Lindsley] - Added haystack-rqueue to "Other Apps". [Daniel Lindsley] - Updated README & index. [Daniel Lindsley] - Added installation instructions. [bigjust] - Merge pull request #556 from h3/master. [Justin Caratzas] Updated to 'xapian_backend.XapianEngine' docs & example - Updated XapianEngine module path. [h3] - Updated XapianEngine module path. [h3] - Merge pull request #660 from seldon/master. [Justin Caratzas] Some minor docs fixes - Fixed a few typos in docs. [Lorenzo Franceschini] - Add Educreations to who uses Haystack. [bigjust] - Merge pull request #692 from stephenpaulger/master. [Justin Caratzas] Change the README link to latest 1.2 release. - Update README.rst. [Stephen Paulger] Update 1.2.6 link to 1.2.7 - Merge pull request #714 from miracle2k/patch-1. [Justin Caratzas] Note enabling INCLUDE_SPELLING requires a reindex. - Note enabling INCLUDE_SPELLING requires a reindex. [Michael Elsdörfer] - Unicode support in SimpleSearchQuery (closes #793) [slollo] - Merge pull request #790 from andrewschoen/feature/haystack-identifier- module. [Andrew Schoen] Added a new setting, HAYSTACK_IDENTIFIER_METHOD, which will allow a cust... - Added a new setting, ``HAYSTACK_IDENTIFIER_METHOD``, which will allow a custom method to be provided for ``haystack.utils.get_identifier``. [Schoen] - Fixed an exception log message in elasticsearch backend, and added a loading test for elasticsearch. [Dan Watson] - Changed exception log message in whoosh backend to use __class__.__name__ instead of just __name__ (closes #641) [Jeffrey Tratner] - Further bumped the docs on installing engines. [Daniel Lindsley] - Update docs/installing_search_engines.rst. [Tom Dyson] grammar, Elasticsearch version and formatting consistency fixes. - Added GroundCity & Docket Alarm to the Who Uses docs. [Daniel Lindsley] - Started the development on v2.0.1. [Daniel Lindsley] v2.0.0 (2013-05-12) ------------------- - Bumped to v2.0.0! [Daniel Lindsley] - Changed how ``Raw`` inputs are handled. Thanks to kylemacfarlane for the (really good) report. [Daniel Lindsley] - Added a (passing) test trying to verify #545. [Daniel Lindsley] - Fixed a doc example on custom forms. Thanks to GrivIN and benspaulding for patches. [Daniel Lindsley] - Added a reserved character for Solr (v4+ supports regexes). Thanks to RealBigB for the initial patch. [Daniel Lindsley] - Merge branch 'master' of github.com:toastdriven/django-haystack. [Jannis Leidel] - Fixed the stats tests. [Daniel Lindsley] - Adding description of stats support to docs. [Ranjit Chacko] - Adding support for stats queries in Solr. [Ranjit Chacko] - Added tests for the previous kwargs patch. [Daniel Lindsley] - Bug fix to allow object removal without a commit. [Madan Thangavelu] - Do not refresh the index after it has been deleted. [Kevin Tran] - Fixed naming of manager for consistency. [Jannis Leidel] - renamed `HaystackManager` to `SearchIndexManager` - renamed `get_query_set` to `get_search_queryset` - Updated the docs on running tests. [Daniel Lindsley] - Merge branch 'madan' [Daniel Lindsley] - Fixed the case where index_name isn't available. [Daniel Lindsley] - Fixing typo to allow manager to switch between different index_labels. [Madan Thangavelu] - Haystack manager and tests. [Madan Thangavelu] - Removing unwanted spaces. [Madan Thangavelu] - Object query manager for searchindex. [Madan Thangavelu] - Added requirements file for testing. [Daniel Lindsley] - Added a unit test for #786. [Dan Watson] - Fixed a bug when passing "using" to SearchQuerySet (closes #786). [Rohan Gupta] - Ignore the env directory. [Daniel Lindsley] - Allow for setuptools as well as distutils. [Daniel Lindsley] - Merge pull request #785 from mattdeboard/dev-mailing-list. [Chris Adams] Add note directing users to django-haystack-dev mailing list. - Add note directing users to django-haystack-dev mailing list. [Matt DeBoard] - Spelling suggestions for ElasticSearch (closes #769 and #747) [Dan Watson] - Added support for sending facet options to the backend (closes #753) [Dan Watson] - More_like_this: honor .models() restriction. [Chris Adams] Original patch by @mattdeboard updated to remove test drift since it was originally submitted Closes #593 Closes #543 - Removed commercial support info. [Daniel Lindsley] - Merge pull request #779 from pombredanne/pep386_docfixes. [Jannis Leidel] Update version to 2.0.0b0 in doc conf - Update version to 2.0.0b0 in doc conf .. to redeem myself of the unlucky #777 minimess. [pombredanne] - Merge pull request #778 from falinsky/patch-1. [Justin Caratzas] Fix bug in setup.py - Fix bug. [Sergey Falinsky] - Merge pull request #777 from pombredanne/patch-1. [Justin Caratzas] Update version to be a PEP386 strict with a minor qualifier of 0 for now... - Update version to be a PEP386 strict with a minor qualifier of 0 for now. [pombredanne] This version becomes a "strict" version under PEP386 and should be recognized by install/packaging tools (such as distribute/distutils/setuptools) as newer than 2.0.0-beta. This will also help making small increments of the version which brings some sanity when using an update from HEAD and ensure that things will upgrade alright. - Update_index: display Unicode model names (closes #767) [Chris Adams] The model's verbose_name_plural value is included as Unicode but under Python 2.x the progress message it was included in was a regular byte-string. Now it's correctly handled as Unicode throughout. - Merge pull request #731 from adityar7/master. [Jannis Leidel] Setup custom routers before settings up signal processor. - Setup custom routers before settings up signal processor. [Aditya Rajgarhia] Fixes https://github.com/toastdriven/django-haystack/issues/727 - Port the `from_python` method from pyelasticsearch to the Elasticsearch backend, similar to `to_python` in 181bbc2c010a135b536e4d1f7a1c5ae4c63e33db. [Jannis Leidel] Fixes #762. Refs #759. - Merge pull request #761 from stefanw/simple-models-filter. [Justin Caratzas] Make models filter work on simple backend - Make model filter for simple backend work. [Stefan Wehrmeyer] Adds Stefan Wehrmeyer to AUTHORS for patch - Merge pull request #746 from lazerscience/fix-update-index-output. [Justin Caratzas] Using force_text for indexing message - Replacing `force_text` with `force_unicode`. #746. [Bernhard Vallant] - Using force_text for indexing message. [Bernhard Vallant] verbose_name_plural may be a functional proxy object from ugettext_lazy, it should be forced to be a string! - Support pyelasticsearch 0.4 change (closes #759) [Chris Adams] pyelasticsearch 0.4 removed the `to_python` method Haystack used. Thanks to @erikrose for the quick patch - Merge pull request #755 from toastdriven/issue/754-doc-build-warning. [Chris Adams] - Add preceding dots to hyperlink target; fixes issue 754. [Ben Spaulding] This error was introduced in commit faacbcb. - Merge pull request #752 from bigjust/master. [Justin Caratzas] Fix Simple Score field collision - Simple: Fix bug in score field collision. [bigjust] Previous commit 0a9c919 broke the simple backend for models that didn't have an indexed score field. Added a test to cover regression. - Set zip_safe in setup.py to prevent egg creation. [Jannis Leidel] This is a work around for a bug in Django that prevents detection of management commands embedded in packages installed as setuptools eggs. - Merge pull request #740 from acdha/simplify-search-view-name-property. [Chris Adams] Remove redundant __name__ assignment on SearchView - Remove redundant __name__ assignment on SearchView. [Chris Adams] __name__ was being explicitly set to a value which was the same as the default value. Additionally corrected the obsolete __name__ method declaration in the documentation which reflected the code prior to SHA:89d8096 in 2010. - Merge pull request #698 from gjb83/master. [Chris Adams] Fixed deprecation warning for url imports on Django 1.3 Thanks to @gjb83 for the patch. - Removed star imports. [gjb83] - Maintain Django 1.3 compatibility. [gjb83] - Fixed deprecation warning. [gjb83] django.conf.urls.defaults is now deprecated. Use django.conf.urls instead. - Merge pull request #743 from bigjust/solr-managementcmd-fix. [Justin Caratzas] Solr build_solr_schema: fixed a bug in build_solr_schema. Thanks to mjum... - Solr build_solr_schema: fixed a bug in build_solr_schema. Thanks to mjumbewu for the report! [Justin Caratzas] If you tried to run build_solr_schema with a backend that supports schema building, but was not Solr (like Whoosh), then you would get an invalid schema. This fix raises the ImproperlyConfigured exception with a proper message. - Merge pull request #742 from bigjust/simple-backend-score-fix. [Justin Caratzas] - Simple: removed conflicting score field from raw result objects. [Justin Caratzas] This keeps consistency with the Solr backend, which resolves this conflict in the same manner. - ElasticSearch: fix AltParser test. [Chris Adams] AltParser queries are still broken but that fucntionality has only been listed as supported on Solr. - Better Solr AltParser quoting (closes #730) [Chris Adams] Previously the Solr AltParser implementation embedded the search term as an attribte inside the {!…} construct, which required it to be doubly escaped. This change contributed by @ivirabyan moves the value outside the query, requiring only our normal quoting: q=(_query_:"{!edismax}Assassin's Creed") instead of: q=(_query_:"{!edismax v='Assassin's Creed'}") Thanks @ivirabyan for the patch! - Solr: use nested query syntax for AltParser queries. [Chris Adams] The previous implementation would, given a query like this:: sqs.filter(content=AltParser('dismax', 'library', qf="title^2 text" mm=1)) generate a query like this:: {!dismax v=library qf="title^2 text" mm=1} This works in certain situations but causes Solr to choke while parsing it when Haystack wraps this term in parentheses:: org.apache.lucene.queryParser.ParseException: Cannot parse '({!dismax mm=1 qf='title^2 text institution^0.8' v=library})': Encountered " <RANGEEX_GOOP> "qf=\'title^1.25 "" at line 1, column 16. The solution is to use the nested query syntax described here: http://searchhub.org/2009/03/31/nested-queries-in-solr/ This will produce a query like this, which works with Solr 3.6.2:: (_query_:"{!edismax mm=1 qf='title^1.5 text institution^0.5' v=library}") Leaving the actual URL query string looking like this:: q=%28_query_%3A%22%7B%21edismax+mm%3D1+qf%3D%27title%5E1.5+text+institution%5E0.5%27+v%3Dlibrary%7D%22%29 * Tests updated for the new query generation output * A Solr backend task was added to actually run the dismax queries and verify that we're not getting Solr 400s errors due to syntax gremlins - Pass active backend to index queryset calls (closes #534) [Chris Adams] Now the Index index_queryset() and read_queryset() methods will be called with the active backend name so they can optionally perform backend-specific filtering. This is extremely useful when using something like Solr cores to maintain language specific backends, allowing an Index to select the appropriate documents for each language:: def index_queryset(self, using=None): return Post.objects.filter(language=using) Changes: * clear_index, update_index and rebuild_index all default to processing *every* backend. ``--using`` may now be provided multiple times to select a subset of the configured backends. * Added examples to the Multiple Index documentation page - Because Windows. [Daniel Lindsley] - Fixed the docs on debugging to cover v2. Thanks to eltesttox for the report. [Daniel Lindsley] - That second colon matters. [Daniel Lindsley] - Further docs on autocomplete. [Daniel Lindsley] - Fixed the imports that would stomp on each other. [Daniel Lindsley] Thanks to codeinthehole, Attorney-Fee & imacleod for pointing this out. - BACKWARD-INCOMPATIBLE: Removed ``RealTimeSearchIndex`` in favor of ``SignalProcessors``. [Daniel Lindsley] This only affects people who were using ``RealTimeSearchIndex`` (or a queuing variant) to perform near real-time updates. Those users should refer to the Migration documentation. - Updated ignores. [Daniel Lindsley] - Merge pull request #552 from hadesgames/master. [Jannis Leidel] Fixes process leak when using update_index with workers. - Fixed update_index process leak. [Tache Alexandru] - Merge branch 'master' of github.com:toastdriven/django-haystack. [Jannis Leidel] - Merge pull request #682 from acdha/682-update_index-tz-support. [Chris Adams] update_index should use non-naive datetime when settings.USE_TZ=True - Tests for update_index timezone support. [Chris Adams] * Confirm that update_index --age uses the Django timezone-aware now support function * Skip this test on Django 1.3 - Update_index: use tz-aware datetime where applicable. [Chris Adams] This will allow Django 1.4 users with USE_TZ=True to use update_index with time windowing as expected - otherwise the timezone offset needs to be manually included in the value passed to -a - Tests: mark expected failures in Whoosh suite. [Chris Adams] This avoids making it painful to run the test suite and flags the tests which need attention - Tests: mark expected failures in ElasticSearch suite. [Chris Adams] This avoids making it painful to run the test suite and flags the tests which need attention - Multiple index tests: correct handling of Whoosh teardown. [Chris Adams] We can't remove the Whoosh directory per-test - only after every test has run… - Whoosh tests: use a unique tempdir. [Chris Adams] This ensures that there's no way for results to persist across runs and lets the OS clean up the mess if we fail catastrophically The multiindex and regular whoosh tests will have different prefixes to ease debugging - Merge pull request #699 from acdha/tox-multiple-django-versions. [Chris Adams] Minor tox.ini & test runner tidying - Test runner: set exit codes on failure. [Chris Adams] - Tox: refactor envlist to include Django versions. [Chris Adams] * Expanded base dependencies * Set TEST_RUNNER_ARGS=-v0 to reduce console noise * Add permutations of python 2.5, 2.6, 2.7 and django 1.3 and 1.4 - Test runner: add $TEST_RUNNER_ARGS env. variable. [Chris Adams] This allows you to export TEST_RUNNER_ARGS=-v0 to affect all 9 invocations - Tox: store downloads in tmpdir. [Chris Adams] - Be a bit more careful when resetting connections in the multiprocessing updater. Fixes #562. [Jannis Leidel] - Fixed distance handling in result parser of the elasticsearch backend. This is basically the second part of #566. Thanks to Josh Drake for the initial patch. [Jannis Leidel] - Merge pull request #670 from dhan88/master. [Jannis Leidel] Elasticsearch backend using incorrect coordinates for geo_bounding_box (within) filter - Elasticsearch geo_bounding_box filter expects top_left (northwest) and bottom_right (southeast). Haystack's elasticsearch backend is passing northeast and southwest coordinates instead. [Danny Han] - Merge pull request #666 from caioariede/master. [Jannis Leidel] Fixes incorrect call to put_mapping on ElasticSearch backend - Fixes incorrect call to put_mapping on elasticsearch backend. [Caio Ariede] - Added ericholscher to AUTHORS. [Daniel Lindsley] - Add a title for the support matrix so it's linkable. [Eric Holscher] - Tests: command-line help and coverage.py support. [Chris Adams] This makes run_all_tests.sh a little easier to use and simplifies the process of running under coverage.py Closes #683 - Tests: basic help and coverage.py support. [Chris Adams] run_all_tests.sh now supports --help and --with-coverage - Add a CONTRIBUTING.md file for Github. [Chris Adams] This is a migrated copy of docs/contributing.rst so Github can suggest it when pull requests are being created - Fix combination logic for complex queries. [Chris Adams] Previously combining querysets which used a mix of logical AND and OR operations behaved unexpectedly. Thanks to @mjl for the patch and tests in SHA: 9192dbd Closes #613, #617 - Added rz to AUTHORS. [Daniel Lindsley] - Fixed string joining bug in the simple backend. [Rodrigo Guzman] - Added failing test case for #438. [Daniel Lindsley] - Fix Solr more-like-this tests (closes #655) [Chris Adams] * Refactored the MLT tests to be less brittle in checking only the top 5 results without respect to slight ordering variations. * Refactored LiveSolrMoreLikeThisTestCase into multiple tests * Convert MLT templatetag tests to rely on mocks for stability and to avoid hard-coding backend assumptions, at the expense of relying completely on the backend MLT queryset-level tests to exercise that code. * Updated MLT code to always assume deferred querysets are available (introduced in Django 1.1) and removed a hard-coded internal attr check - All backends: fixed more_like_this & deferreds. [Chris Adams] Django removed the get_proxied_model helper function in the 1.3 dev cycle: https://code.djangoproject.com/ticket/17678 This change adds support for the simple new property access used by 1.3+ BACKWARD INCOMPATIBLE: Django 1.2 is no longer supported - Updated elasticsearch backend to use a newer pyelasticsearch release that features an improved API , connection pooling and better exception handling. [Jannis Leidel] - Added Gidsy to list of who uses Haystack. [Jannis Leidel] - Increased the number of terms facets returned by the Elasticsearch backend to 100 from the default 10 to work around an issue upstream. [Jannis Leidel] This is hopefully only temporary until it's fixed in Elasticsearch, see https://github.com/elasticsearch/elasticsearch/issues/1776. - Merge pull request #643 from stephenmcd/master. [Chris Adams] Fixed logging in simple_backend - Fixed logging in simple_backend. [Stephen McDonald] - Added Pitchup to Who Uses. [Daniel Lindsley] - Merge branch 'unittest2-fix' [Chris Adams] - Better unittest2 detection. [Chris Adams] This supports Python 2.6 and earlier by shifting the import to look towards the future name rather than the past - Merge pull request #652 from acdha/solr-content-extraction-test-fix. [Chris Adams] Fix the Solr content extraction handler tests - Add a minimal .travis.yml file to suppress build spam. [Chris Adams] Until the travis-config branch is merged in, this can be spread around to avoid wasting time running builds before we're ready - Tests: enable Solr content extraction handler. [Chris Adams] This is needed for the test_content_extraction test to pass - Tests: Solr: fail immediately on config errors. [Chris Adams] - Solr tests: clean unused imports. [Chris Adams] - Suppress console DeprecationWarnings. [Chris Adams] - Merge pull request #651 from acdha/unittest2-fix. [Chris Adams] Update unittest2 import logic so the tests can actually be run - Update unittest2 import logic. [Chris Adams] We'll try to get it from Django 1.3+ but Django 1.2 users will need to install it manually - Merge pull request #650 from bigjust/patch-1. [Chris Adams] Fix typo in docstring - Fix typo. [Justin Caratzas] - Refactor to use a dummy logger that lets you turn off logging. [Travis Swicegood] - A bunch of Solr testing cleanup. [Chris Adams] - Skip test is pysolr isn't available. [Travis Swicegood] - Updated Who Uses to correct a backend usage. [Daniel Lindsley] - Updated documentation about using the main pyelasticsearch release. [Jannis Leidel] - Merge pull request #628 from kjoconnor/patch-1. [Jannis Leidel] Missing ` - Missing ` [Kevin O'Connor] - Fixed a mostly-empty warning in the ``SearchQuerySet`` docs. Thanks to originell for the report! [Daniel Lindsley] - Fixed the "Who Uses" entry on AstroBin. [Daniel Lindsley] - Use the match_all query to speed up performing filter only queries dramatically. [Jannis Leidel] - Fixed typo in docs. Closes #612. [Jannis Leidel] - Updated link to celery-haystack repository. [Jannis Leidel] - Fixed the docstring of SearchQuerySet.none. Closes #435. [Jannis Leidel] - Fixed the way quoting is done in the Whoosh backend when using the ``__in`` filter. [Jason Kraus] - Added the solrconfig.xml I use for testing. [Daniel Lindsley] - Fixed typo in input types docs. Closes #551. [Jannis Leidel] - Make sure an search engine's backend isn't instantiated on every call to the backend but only once. Fixes #580. [Jannis Leidel] - Restored sorting to ES backend that was broken in d1fa95529553ef8d053308159ae4efc455e0183f. [Jannis Leidel] - Prevent spatial filters from stomping on existing filters in ElasticSearch backend. [Josh Drake] - Merge branch 'mattdeboard-sq-run-refactor' [Jannis Leidel] - Fixed an ES test that seems like a change in behavior in recent ES versions. [Jannis Leidel] - Merge branch 'sq-run-refactor' of https://github.com/mattdeboard /django-haystack into mattdeboard-sq-run-refactor. [Jannis Leidel] - Refactor Solr & ES SearchQuery subclasses to use the ``build_params`` from ``BaseSearchQuery`` to build the kwargs to be passed to the search engine. [Matt DeBoard] This refactor is made to make extending Haystack simpler. I only ran the Solr tests which invoked a ``run`` call (via ``get_results``), and those passed. I did not run the ElasticSearch tests; however, the ``run`` method for both Lucene-based search engines were identical before, and are identical now. The test I did run -- ``LiveSolrSearchQueryTestCase.test_log_query`` -- passed. - Merge branch 'master' of https://github.com/toastdriven/django- haystack. [Jannis Leidel] - Merge pull request #568 from duncm/master. [Jannis Leidel] Fix exception in SearchIndex.get_model() - Fixed ``SearchIndex.get_model()`` to raise exception instead of returning it. [Duncan Maitland] - Merge branch 'master' of https://github.com/toastdriven/django- haystack. [Jannis Leidel] - Fixed Django 1.4 compatibility. Thanks to bloodchild for the report! [Daniel Lindsley] - Refactored ``SearchBackend.search`` so that kwarg-generation operations are in a discrete method. [Matt DeBoard] This makes it much simpler to subclass ``SearchBackend`` (& the engine-specific variants) to add support for new parameters. - Added witten to AUTHORS. [Daniel Lindsley] - Fix for #378: Highlighter returns unexpected results if one term is found within another. [dan] - Removed jezdez's old entry in AUTHORS. [Daniel Lindsley] - Added Jannis to Primary Authors. [Daniel Lindsley] - Merge branch 'master' of github.com:jezdez/django-haystack. [Jannis Leidel] - Fixed a raise condition when using the simple backend (e.g. in tests) and changing the DEBUG setting dynamically (e.g. in integration tests). [Jannis Leidel] - Add missing `ImproperlyConfigured` import from django's exceptions. [Luis Nell] l178 failed. - Commercial support is now officially available for Haystack. [Daniel Lindsley] - Using multiple workers (and resetting the connection) causes things to break when the app is finished and it moves to the next and does qs.count() to get a count of the objects in that app to index with psycopg2 reporting a closed connection. Manually closing the connection before each iteration if using multiple workers before building the queryset fixes this issue. [Adam Fast] - Removed code leftover from v1.X. Thanks to kossovics for the report! [Daniel Lindsley] - Fixed a raise condition when using the simple backend (e.g. in tests) and changing the DEBUG setting dynamically (e.g. in integration tests). [Jannis Leidel] - All backends let individual documents fail, rather than failing whole chunks. Forward port of acdha's work on 1.2.X. [Daniel Lindsley] - Added ikks to AUTHORS. [Daniel Lindsley] - Fixed ``model_choices`` to use ``smart_unicode``. [Igor Támara] - +localwiki.org. [Philip Neustrom] - Added Pix Populi to "Who Uses". [Daniel Lindsley] - Added contribution guidelines. [Daniel Lindsley] - Updated the docs to reflect the supported version of Django. Thanks to catalanojuan for the original patch! [Daniel Lindsley] - Fix PYTHONPATH Export and add Elasticsearch example. [Craig Nagy] - Updated the Whoosh URL. Thanks to cbess for the original patch! [Daniel Lindsley] - Reset database connections on each process on update_index when using --workers. [Diego Búrigo Zacarão] - Moved the ``build_queryset`` method to ``SearchIndex``. [Alex Vidal] This method is used to build the queryset for indexing operations. It is copied from the build_queryset function that lived in the update_index management command. Making this change allows developers to modify the queryset used for indexing even when a date filter is necessary. See `tests/core/indexes.py` for tests. - Fixed a bug where ``Indexable`` could be mistakenly recognized as a discoverable class. Thanks to twoolie for the original patch! [Daniel Lindsley] - Fixed a bug with query construction. Thanks to dstufft for the report! [Daniel Lindsley] This goes back to erroring on the side of too many parens, where there weren't enough before. The engines will no-op them when they're not important. - Fixed a bug where South would cause Haystack to setup too soon. Thanks to adamfast for the report! [Daniel Lindsley] - Added Crate.io to "Who Uses"! [Daniel Lindsley] - Fixed a small typo in spatial docs. [Frank Wiles] - Logging: avoid forcing string interpolation. [Chris Adams] - Fixed docs on using a template for Solr schema. [Daniel Lindsley] - Add note to 'Installing Search Engines' doc explaining how to override the template used by 'build_solr_schema' [Matt DeBoard] - Better handling of ``.models``. Thanks to zbyte64 for the report & HonzaKral for the original patch! [Daniel Lindsley] - Added Honza to AUTHORS. [Daniel Lindsley] - Handle sorting for ElasticSearch better. [Honza Kral] - Update docs/backend_support.rst. [Issac Kelly] - Fixed a bug where it's possible to erroneously try to get spelling suggestions. Thanks to bigjust for the report! [Daniel Lindsley] - The ``dateutil`` requirement is now optional. Thanks to arthurnn for the report. [Daniel Lindsley] - Fixed docs on Solr spelling suggestion until the new Suggester support can be added. Thanks to zw0rk & many others for the report! [Daniel Lindsley] - Bumped to beta. [Daniel Lindsley] We're not there yet, but we're getting close. - Added saved-search to subproject docs. [Daniel Lindsley] - Search index discovery no longer swallows errors with reckless abandon. Thanks to denplis for the report! [Daniel Lindsley] - Elasticsearch backend officially supported. [Daniel Lindsley] All tests passing. - Back down to 3 on latest pyelasticsearch. [Daniel Lindsley] - And then there were 3 (Elasticsearch test failures). [Daniel Lindsley] - Solr tests now run faster. [Daniel Lindsley] - Improved the tutorial docs. Thanks to denplis for the report! [Daniel Lindsley] - Down to 9 failures on Elasticsearch. [Daniel Lindsley] - Because the wishlist has changed. [Daniel Lindsley] - A few small fixes. Thanks to robhudson for the report! [Daniel Lindsley] - Added an experimental Elasticsearch backend. [Daniel Lindsley] Tests are not yet passing but it works in basic hand-testing. Passing test coverage coming soon. - Fixed a bug related to the use of ``Exact``. [Daniel Lindsley] - Removed accidental indent. [Daniel Lindsley] - Ensure that importing fields without the GeoDjango kit doesn't cause an error. Thanks to dimamoroz for the report! [Daniel Lindsley] - Added the ability to reload a connection. [Daniel Lindsley] - Fixed ``rebuild_index`` to properly have all options available. [Daniel Lindsley] - Fixed a bug in pagination. Thanks to sgoll for the report! [Daniel Lindsley] - Added an example to the docs on what to put in ``INSTALLED_APPS``. Thanks to Dan Krol for the suggestion. [Daniel Lindsley] - Changed imports so the geospatial modules are only imported as needed. [Dan Loewenherz] - Better excluded index detection. [Daniel Lindsley] - Fixed a couple of small typos. [Sean Bleier] - Made sure the toolbar templates are included in the source distribution. [Jannis Leidel] - Fixed a few documentation issues. [Jannis Leidel] - Moved my contribution for the geospatial backend to a attribution of Gidsy which funded my work. [Jannis Leidel] - Small docs fix. [Daniel Lindsley] - Added input types, which enables advanced querying support. Thanks to CMGdigital for funding the development! [Daniel Lindsley] - Added geospatial search support! [Daniel Lindsley] I have anxiously waited to add this feature for almost 3 years now. Support is finally present in more than one backend & I was generously given some paid time to work on implementing this. Thanks go out to: * CMGdigital, who paid for ~50% of the development of this feature & were awesomely supportive. * Jannis Leidel (jezdez), who did the original version of this patch & was an excellent sounding board. * Adam Fast, for patiently holding my hand through some of the geospatial confusions & for helping me verify GeoDjango functionality. * Justin Bronn, for the great work he originally did on GeoDjango, which served as a point of reference/inspiration on the API. And thanks to all others who have submitted a variety of patches/pull requests/interest throughout the years trying to get this feature in place. - Added .values() / .values_list() methods, for fetching less data. Thanks to acdha for the original implementation! [Daniel Lindsley] - Reduced the number of queries Haystack has to perform in many cases (pagination/facet_counts/spelling_suggestions). Thanks to acdha for the improvements! [Daniel Lindsley] - Spruced up the layout on the new DjDT panel. [Daniel Lindsley] - Fixed compatibility with Django pre-1.4 trunk. * The MAX_SHOW_ALL_ALLOWED variable is no longer available, and hence causes an ImportError with Django versions higher 1.3. * The "list_max_show_all" attribute on the ChangeList object is used instead. * This patch maintains compatibility with Django 1.3 and lower by trying to import the MAX_SHOW_ALL_ALLOWED variable first. [Aram Dulyan] - Updated ``setup.py`` for the new panel bits. [Daniel Lindsley] - Added a basic DjDT panel for Haystack. Thanks to robhudson for planting the seed that Haystack should bundle this! [Daniel Lindsley] - Added the ability to specify apps or individual models to ``update_index``. Thanks to CMGdigital for funding this development! [Daniel Lindsley] - Added ``--start/--end`` flags to ``update_index`` to allow finer- grained control over date ranges. Thanks to CMGdigital for funding this development! [Daniel Lindsley] - I hate Python packaging. [Daniel Lindsley] - Made ``SearchIndex`` classes thread-safe. Thanks to craigds for the report & original patch. [Daniel Lindsley] - Added a couple more uses. [Daniel Lindsley] - Bumped reqs in docs for content extraction bits. [Daniel Lindsley] - Added a long description for PyPI. [Daniel Lindsley] - Solr backend support for rich-content extraction. [Chris Adams] This allows indexes to use text extracted from binary files as well as normal database content. - Fixed errant ``self.log``. [Daniel Lindsley] Thanks to terryh for the report! - Fixed a bug with index inheritance. [Daniel Lindsley] Fields would seem to not obey the MRO while method did. Thanks to ironfroggy for the report! - Fixed a long-time bug where the Whoosh backend didn't have a ``log`` attribute. [Daniel Lindsley] - Fixed a bug with Whoosh's edge n-gram support to be consistent with the implementation in the other engines. [Daniel Lindsley] - Added celery-haystack to Other Apps. [Daniel Lindsley] - Changed ``auto_query`` so it can be run on other, non-``content`` fields. [Daniel Lindsley] - Removed extra loops through the field list for a slight performance gain. [Daniel Lindsley] - Moved ``EXCLUDED_INDEXES`` to a per-backend setting. [Daniel Lindsley] - BACKWARD-INCOMPATIBLE: The default filter is now ``__contains`` (in place of ``__exact``). [Daniel Lindsley] If you were relying on this behavior before, simply add ``__exact`` to the fieldname. - BACKWARD-INCOMPATIBLE: All "concrete" ``SearchIndex`` classes must now mixin ``indexes.Indexable`` as well in order to be included in the index. [Daniel Lindsley] - Added tox to the mix. [Daniel Lindsley] - Allow for less configuration. Thanks to jeromer & cyberdelia for the reports! [Daniel Lindsley] - Fixed up the management commands to show the right alias & use the default better. Thanks to jeromer for the report! [Daniel Lindsley] - Fixed a bug where signals wouldn't get setup properly, especially on ``RealTimeSearchIndex``. Thanks to byoungb for the report! [Daniel Lindsley] - Fixed formatting in the tutorial. [Daniel Lindsley] - Removed outdated warning about padding numeric fields. Thanks to mchaput for pointing this out! [Daniel Lindsley] - Added a silent failure option to prevent Haystack from suppressing some failures. [Daniel Lindsley] This option defaults to ``True`` for compatibility & to prevent cases where lost connections can break reindexes/searches. - Fixed the simple backend to not throw an exception when handed an ``SQ``. Thanks to diegobz for the report! [Daniel Lindsley] - Whoosh now supports More Like This! Requires Whoosh 1.8.4. [Daniel Lindsley] - Deprecated ``get_queryset`` & fixed how indexing happens. Thanks to Craig de Stigter & others for the report! [Daniel Lindsley] - Fixed a bug where ``RealTimeSearchIndex`` was erroneously included in index discovery. Thanks to dedsm for the report & original patch! [Daniel Lindsley] - Added Vickery to "Who Uses". [Daniel Lindsley] - Require Whoosh 1.8.3+. It's for your own good. [Daniel Lindsley] - Added multiprocessing support to ``update_index``! Thanks to CMGdigital for funding development of this feature. [Daniel Lindsley] - Fixed a bug where ``set`` couldn't be used with ``__in``. Thanks to Kronuz for the report! [Daniel Lindsley] - Added a ``DecimalField``. [Daniel Lindsley] - Fixed a bug where a different style of import could confuse the collection of indexes. Thanks to groovecoder for the report. [Daniel Lindsley] - Fixed a typo in the autocomplete docs. Thanks to anderso for the catch! [Daniel Lindsley] - Fixed a backward-incompatible query syntax change Whoosh introduced between 1.6.1 & 1.6.2 that causes only one model to appear as though it is indexed. [Daniel Lindsley] - Updated AUTHORS to reflect the Kent's involvement in multiple index support. [Daniel Lindsley] - BACKWARD-INCOMPATIBLE: Added multiple index support to Haystack, which enables you to talk to more than one search engine in the same codebase. Thanks to: [Daniel Lindsley] * Kent Gormat for funding the development of this feature. * alex, freakboy3742 & all the others who contributed to Django's multidb feature, on which much of this was based. * acdha for inspiration & feedback. * dcramer for inspiration & feedback. * mcroydon for patch review & docs feedback. This commit starts the development efforts for Haystack v2. v1.2.7 (2012-04-06) ------------------- - Bumped to v1.2.7! [Daniel Lindsley] - Solr: more informative logging when full_prepare fails during update. [Chris Adams] * Change the exception handler to record per-object failures * Log the precise object which failed in a manner which tools like Sentry can examine - Added ikks to AUTHORS. [Daniel Lindsley] - Fixed ``model_choices`` to use ``smart_unicode``. Thanks to ikks for the patch! [Daniel Lindsley] - Fixed compatibility with Django pre-1.4 trunk. * The MAX_SHOW_ALL_ALLOWED variable is no longer available, and hence causes an ImportError with Django versions higher 1.3. * The "list_max_show_all" attribute on the ChangeList object is used instead. * This patch maintains compatibility with Django 1.3 and lower by trying to import the MAX_SHOW_ALL_ALLOWED variable first. [Aram Dulyan] - Fixed a bug in pagination. Thanks to sgoll for the report! [Daniel Lindsley] - Added an example to the docs on what to put in ``INSTALLED_APPS``. Thanks to Dan Krol for the suggestion. [Daniel Lindsley] - Added .values() / .values_list() methods, for fetching less data. [Chris Adams] - Reduced the number of queries Haystack has to perform in many cases (pagination/facet_counts/spelling_suggestions). [Chris Adams] - Fixed compatibility with Django pre-1.4 trunk. * The MAX_SHOW_ALL_ALLOWED variable is no longer available, and hence causes an ImportError with Django versions higher 1.3. * The "list_max_show_all" attribute on the ChangeList object is used instead. * This patch maintains compatibility with Django 1.3 and lower by trying to import the MAX_SHOW_ALL_ALLOWED variable first. [Aram Dulyan] v1.2.6 (2011-12-09) ------------------- - I hate Python packaging. [Daniel Lindsley] - Bumped to v1.2.6! [Daniel Lindsley] - Made ``SearchIndex`` classes thread-safe. Thanks to craigds for the report & original patch. [Daniel Lindsley] - Added a long description for PyPI. [Daniel Lindsley] - Fixed errant ``self.log``. [Daniel Lindsley] Thanks to terryh for the report! - Started 1.2.6. [Daniel Lindsley] v1.2.5 (2011-09-14) ------------------- - Bumped to v1.2.5! [Daniel Lindsley] - Fixed a bug with index inheritance. [Daniel Lindsley] Fields would seem to not obey the MRO while method did. Thanks to ironfroggy for the report! - Fixed a long-time bug where the Whoosh backend didn't have a ``log`` attribute. [Daniel Lindsley] - Fixed a bug with Whoosh's edge n-gram support to be consistent with the implementation in the other engines. [Daniel Lindsley] - Added tswicegood to AUTHORS. [Daniel Lindsley] - Fixed the ``clear_index`` management command to respect the ``--site`` option. [Travis Swicegood] - Removed outdated warning about padding numeric fields. Thanks to mchaput for pointing this out! [Daniel Lindsley] - Added a silent failure option to prevent Haystack from suppressing some failures. [Daniel Lindsley] This option defaults to ``True`` for compatibility & to prevent cases where lost connections can break reindexes/searches. - Fixed the simple backend to not throw an exception when handed an ``SQ``. Thanks to diegobz for the report! [Daniel Lindsley] - Bumped version post-release. [Daniel Lindsley] - Whoosh now supports More Like This! Requires Whoosh 1.8.4. [Daniel Lindsley] v1.2.4 (2011-05-28) ------------------- - Bumped to v1.2.4! [Daniel Lindsley] - Fixed a bug where the old ``get_queryset`` wouldn't be used during ``update_index``. Thanks to Craig de Stigter & others for the report. [Daniel Lindsley] - Bumped to v1.2.3! [Daniel Lindsley] - Require Whoosh 1.8.3+. It's for your own good. [Daniel Lindsley] v1.2.2 (2011-05-19) ------------------- - Bumped to v1.2.2! [Daniel Lindsley] - Added multiprocessing support to ``update_index``! Thanks to CMGdigital for funding development of this feature. [Daniel Lindsley] - Fixed a bug where ``set`` couldn't be used with ``__in``. Thanks to Kronuz for the report! [Daniel Lindsley] - Added a ``DecimalField``. [Daniel Lindsley] v1.2.1 (2011-05-14) ------------------- - Bumped to v1.2.1. [Daniel Lindsley] - Fixed a typo in the autocomplete docs. Thanks to anderso for the catch! [Daniel Lindsley] - Fixed a backward-incompatible query syntax change Whoosh introduced between 1.6.1 & 1.6.2 that causes only one model to appear as though it is indexed. [Daniel Lindsley] v1.2.0 (2011-05-03) ------------------- - V1.2.0! [Daniel Lindsley] - Added ``request`` to the ``FacetedSearchView`` context. Thanks to dannercustommade for the report! [Daniel Lindsley] - Fixed the docs on enabling spelling suggestion support in Solr. [Daniel Lindsley] - Fixed a bug so that ``ValuesListQuerySet`` now works with the ``__in`` filter. Thanks to jcdyer for the report! [Daniel Lindsley] - Added the new ``SearchIndex.read_queryset`` bits. [Sam Cooke] - Changed ``update_index`` so that it warns you if your ``SearchIndex.get_queryset`` returns an unusable object. [Daniel Lindsley] - Removed Python 2.3 compat code & bumped requirements for the impending release. [Daniel Lindsley] - Added treyhunner to AUTHORS. [Daniel Lindsley] - Improved the way selected_facets are handled. [Chris Adams] * ``selected_facets`` may be provided multiple times. * Facet values are quoted to avoid backend confusion (i.e. `author:Joe Blow` is seen by Solr as `author:Joe AND Blow` rather than the expected `author:"Joe Blow"`) - Add test for Whoosh field boost. [Trey Hunner] - Enable field boosting with Whoosh backend. [Trey Hunner] - Fixed the Solr & Whoosh backends to use the correct ``site`` when processing results. Thanks to Madan Thangavelu for the original patch! [Daniel Lindsley] - Added lukeman to AUTHORS. [Daniel Lindsley] - Updating Solr download and installation instructions to reference version 1.4.1 as 1.3.x is no longer available. Fixes #341. [lukeman] - Revert "Shifted ``handle_registrations`` into ``models.py``." [Daniel Lindsley] This seems to be breaking for people, despite working here & passing tests. Back to the drawing board... This reverts commit 106758f88a9bc5ab7e505be62d385d876fbc52fe. - Shifted ``handle_registrations`` into ``models.py``. [Daniel Lindsley] For historical reasons, it was (wrongly) kept & run in ``__init__.py``. This should help fix many people's issues with it running too soon. - Pulled out ``EmptyResults`` for testing elsewhere. [Daniel Lindsley] - Fixed a bug where boolean filtering wouldn't work properly on Whoosh. Thanks to alexrobbins for pointing it out! [Daniel Lindsley] - Added link to 1.1 version of the docs. [Daniel Lindsley] - Whoosh 1.8.1 compatibility. [Daniel Lindsley] - Added TodasLasRecetas to "Who Uses". Thanks Javier! [Daniel Lindsley] - Added a new method to ``SearchQuerySet`` to allow you to specify a custom ``result_class`` to use in place of ``SearchResult``. Thanks to aaronvanderlip for getting me thinking about this! [Daniel Lindsley] - Added better autocomplete support to Haystack. [Daniel Lindsley] - Changed ``SearchForm`` to be more permissive of missing form data, especially when the form is unbound. Thanks to cleifer for pointing this out! [Daniel Lindsley] - Ensured that the primary key of the result is a string. Thanks to gremmie for pointing this out! [Daniel Lindsley] - Fixed a typo in the tutorial. Thanks to JavierLopezMunoz for pointing this out! [Daniel Lindsley] - Added appropriate warnings about ``HAYSTACK__PATH`` settings in the docs. [Daniel Lindsley] - Added some checks for badly-behaved backends. [Daniel Lindsley] - Ensure ``use_template`` can't be used with ``MultiValueField``. [Daniel Lindsley] - Added n-gram fields for auto-complete style searching. [Daniel Lindsley] - Added ``django-celery-haystack`` to the subapp docs. [Daniel Lindsley] - Fixed the the faceting docs to correctly link to narrowed facets. Thanks to daveumr for pointing that out! [Daniel Lindsley] - Updated docs to reflect the ``form_kwargs`` that can be used for customization. [Daniel Lindsley] - Whoosh backend now explicitly closes searchers in an attempt to use fewer file handles. [Daniel Lindsley] - Changed fields so that ``boost`` is now the parameter of choice over ``weight`` (though ``weight`` has been retained for backward compatibility). Thanks to many people for the report! [Daniel Lindsley] - Bumped revision. [Daniel Lindsley] v1.1 (2010-11-23) ----------------- - Bumped version to v1.1! [Daniel Lindsley] - The ``build_solr_schema`` command can now write directly to a file. Also includes tests for the new overrides. [Daniel Lindsley] - Haystack's reserved field names are now configurable. [Daniel Lindsley] - BACKWARD-INCOMPATIBLE: ``auto_query`` has changed so that only double quotes cause exact match searches. Thanks to craigds for the report! [Daniel Lindsley] - Added docs on handling content-type specific output in results. [Daniel Lindsley] - Added tests for ``content_type``. [Daniel Lindsley] - Added docs on boosting. [Daniel Lindsley] - Updated the ``searchfield_api`` docs. [Daniel Lindsley] - ``template_name`` can be a list of templates passed to ``loader.select_template``. Thanks to zifot for the suggestion. [Daniel Lindsley] - Moved handle_facet_parameters call into FacetField's __init__. [Travis Cline] - Updated the pysolr dependency docs & added a debugging note about boost support. [Daniel Lindsley] - Starting the beta. [Daniel Lindsley] - Fixed a bug with ``FacetedSearchForm`` where ``cleaned_data`` may not exist. Thanks to imageinary for the report! [Daniel Lindsley] - Added the ability to build epub versions of the docs. [Alfredo] - Clarified that the current supported version of Whoosh is the 1.1.1+ series. Thanks to glesica for the report & original patch! [Daniel Lindsley] - The SearchAdmin now correctly uses SEARCH_VAR instead of assuming things. [Rob Hudson] - Added the ability to "weight" individual fields to adjust their relevance. [David Sauve] - Fixed facet fieldname lookups to use the proper fieldname. [Daniel Lindsley] - Removed unneeded imports from the Solr backend. [Daniel Lindsley] - Further revamping of faceting. Each field type now has a faceted variant that's created either with ``faceted=True`` or manual initialization. [Daniel Lindsley] This should also make user-created field types possible, as many of the gross ``isinstance`` checks were removed. - Fixes SearchQuerySet not pickleable. Patch by oyiptong, tests by toastdriven. [oyiptong] - Added the ability to remove objects from the index that are no longer in the database to the ``update_index`` management command. [Daniel Lindsley] - Added a ``range`` filter type. Thanks to davisp & lukesneeringer for the suggestion! [Daniel Lindsley] Note that integer ranges are broken on the current Whoosh (1.1.1). However, date & character ranges seem to work fine. - Consistency. [Daniel Lindsley] - Ensured that multiple calls to ``count`` don't result in multiple queries. Thanks to Nagyman and others for the report! [Daniel Lindsley] - Ensure that when fetching the length of a result set that the whole index isn't consumed (especially on Whoosh & Xapian). [Daniel Lindsley] - Really fixed dict ordering bugs in SearchSite. [Travis Cline] - Changed how you query for facets and how how they are presented in the facet counts. Allows customization of facet field names in indexes. [Travis Cline] Lightly backward-incompatible (git only). - Made it easier to override ``SearchView/SearchForm`` behavior when no query is present. [Daniel Lindsley] No longer do you need to override both ``SearchForm`` & ``SearchView`` if you want to return all results. Use the built-in ``SearchView``, provide your own custom ``SearchForm`` subclass & override the ``no_query_found`` method per the docstring. - Don't assume that any pk castable to an integer should be an integer. [Carl Meyer] - Fetching a list of all fields now produces correct results regardless of dict-ordering. Thanks to carljm & veselosky for the report! [Daniel Lindsley] - Added notes about what is needed to make schema-building independent of dict-ordering. [Daniel Lindsley] - Sorted model order matters. [Daniel Lindsley] - Prevent Whoosh from erroring if the ``end_offset`` is less than or equal to 0. Thanks to zifot for the report! [Daniel Lindsley] - Removed insecure use of ``eval`` from the Whoosh backend. Thanks to SmileyChris for pointing this out. [Daniel Lindsley] - Disallow ``indexed=False`` on ``FacetFields``. Thanks to jefftriplett for the report! [Daniel Lindsley] - Added ``FacetField`` & changed the way facets are processed. [Daniel Lindsley] Facet data is no longer quietly duplicated just before it goes into the index. Instead, full fields are created (with all the standard data & methods) to contain the faceted information. This change is backward-compatible, but allows for better extension, not requiring data duplication into an unfaceted field and a little less magic. - EmptyQuerySet.facet_counts() won't hit the backend. [Chris Adams] This avoids an unnecessary extra backend query displaying the default faceted search form. - TextMate fail. [Daniel Lindsley] - Changed ``__name__`` to an attribute on ``SearchView`` to work with decorators. Thanks to trybik for the report! [Daniel Lindsley] - Changed some wording on the tutorial to indicate where the data template should go. Thanks for the suggestion Davepar! [Daniel Lindsley] - Merge branch 'whoosh-1.1' [Daniel Lindsley] - Final cleanup before merging Whoosh 1.1 branch! [Daniel Lindsley] - Final Whoosh 1.1.1 fixes. Waiting for an official release of Whoosh & hand testing, then this ought to be merge-able. [Daniel Lindsley] - Upgraded the Whoosh backend to 1.1. Still one remaining test failure and two errors. Waiting on mchaput's thoughts/patches. [Daniel Lindsley] - Mistakenly committed this change. This bug is not fixed. [Daniel Lindsley] - Better handling of attempts at loading backends when the various supporting libraries aren't installed. Thanks to traviscline for the report. [Daniel Lindsley] - Fixed random test failures from not running the Solr tests in awhile. [Daniel Lindsley] - Changed mlt test to use a set comparison to eliminate failures due to ordering differences. [Travis Cline] - Sped up Solr backend tests by moving away from RealTimeSearchIndex since it was adding objects to Solr when loading fixtures. [Travis Cline] - Automatically add ``suggestion`` to the context if ``HAYSTACK_INCLUDE_SPELLING`` is set. Thanks to notanumber for the suggestion! [Daniel Lindsley] - Added apollo13 to AUTHORS for the ``SearchForm.__init__`` cleanup. [Daniel Lindsley] - Use kwargs.pop instead of try/except. [Florian Apolloner] - Added Rob to AUTHORS for the admin cleanup. [Daniel Lindsley] - Fixed selection_note text by adding missing zero. [Rob Hudson] - Fixed full_result_count in admin search results. [Rob Hudson] - Fixed admin actions in admin search results. [Rob Hudson] - Added DevCheatSheet to "Who Uses". [Daniel Lindsley] - Added Christchurch Art Gallery to "Who Uses". [Daniel Lindsley] - Forgot to include ghostrocket as submitting a patch on the previous commit. [Daniel Lindsley] - Fixed a serious bug in the ``simple`` backend that would flip the object instance and class. [Daniel Lindsley] - Updated Whoosh to 0.3.18. [Daniel Lindsley] - Updated NASA's use of Haystack in "Who Uses". [Daniel Lindsley] - Changed how ``ModelSearchIndex`` introspects to accurately use ``IntegerField`` instead of ``FloatField`` as it was using. [Daniel Lindsley] - Added CongresoVisible to Who Uses. [Daniel Lindsley] - Added a test to verify a previous change to the ``simple`` backend. [Daniel Lindsley] - Fixed the new admin bits to not explode on Django 1.1. [Daniel Lindsley] - Added ``SearchModelAdmin``, which enables Haystack-based search within the admin. [Daniel Lindsley] - Fixed a bug when not specifying a ``limit`` when using the ``more_like_this`` template tag. Thanks to symroe for the original patch. [Daniel Lindsley] - Fixed the error messages that occur when looking up attributes on a model. Thanks to acdha for the patch. [Daniel Lindsley] - Added pagination to the example search template in the docs so it's clear that it is supported. [Daniel Lindsley] - Fixed copy-paste foul in ``Installing Search Engines`` docs. [Daniel Lindsley] - Fixed the ``simple`` backend to return ``SearchResult`` instances, not just bare model instances. Thanks to Agos for the report. [Daniel Lindsley] - Fixed the ``clear_index`` management command to respect ``--verbosity``. Thanks to kylemacfarlane for the report. [Daniel Lindsley] - Altered the ``simple`` backend to only search textual fields. This makes the backend work consistently across all databases and is likely the desired behavior anyhow. Thanks to kylemacfarlane for the report. [Daniel Lindsley] - Fixed a bug in the ``Highlighter`` which would double-highlight HTML tags. Thanks to EmilStenstrom for the original patch. [Daniel Lindsley] - Updated management command docs to mention all options that are accepted. [Daniel Lindsley] - Altered the Whoosh backend to correctly clear the index when using the ``RAMStorage`` backend. Thanks to kylemacfarlane for the initial patch. [Daniel Lindsley] - Changed ``SearchView`` to allow more control over how many results are shown per page. Thanks to simonw for the suggestion. [Daniel Lindsley] - Ignore ``.pyo`` files when listing out the backend options. Thanks to kylemacfarlane for the report. [Daniel Lindsley] - Added CustomMade to Who Uses. [Daniel Lindsley] - Moved a backend import to allow changing the backend Haystack uses on the fly. [Daniel Lindsley] Useful for testing. - Added more debugging information to the docs. [Daniel Lindsley] - Added DeliverGood.org to the "Who Uses" docs. [Daniel Lindsley] - Added an settings override on ``HAYSTACK_LIMIT_TO_REGISTERED_MODELS`` as a possible performance optimization. [Daniel Lindsley] - Added the ability to pickle ``SearchResult`` objects. Thanks to dedsm for the original patch. [Daniel Lindsley] - Added docs and fixed tests on the backend loading portions. Thanks to kylemacfarlane for the report. [Daniel Lindsley] - Fixed bug with ``build_solr_schema`` where ``stored=False`` would be ignored. Thanks to johnthedebs for the report. [Daniel Lindsley] - Added debugging notes for Solr. Thanks to smccully for reporting this. [Daniel Lindsley] - Fixed several errors in the ``simple`` backend. Thanks to notanumber for the original patch. [Daniel Lindsley] - Documentation fixes for Xapian. Thanks to notanumber for the edits! [Daniel Lindsley] - Fixed a typo in the tutorial. Thanks to cmbeelby for pointing this out. [Daniel Lindsley] - Fixed an error in the tutorial. Thanks to bencc for pointing this out. [Daniel Lindsley] - Added a warning to the docs that ``SearchQuerySet.raw_search`` does not chain. Thanks to jacobstr for the report. [Daniel Lindsley] - Fixed an error in the documentation on providing fields for faceting. Thanks to ghostmob for the report. [Daniel Lindsley] - Fixed a bug where a field that's both nullable & faceted would error if no data was provided. Thanks to LarryEitel for the report. [Daniel Lindsley] - Fixed a regression where the built-in Haystack fields would no longer facet correctly. Thanks to traviscline for the report. [Daniel Lindsley] - Fixed last code snippet on the ``SearchIndex.prepare_FOO`` docs. Thanks to sk1p for pointing that out. [Daniel Lindsley] - Fixed a bug where the schema could be built improperly if similar fieldnames had different options. [Daniel Lindsley] - Added to existing tests to ensure that multiple faceted fields are included in the index. [Daniel Lindsley] - Finally added a README. [Daniel Lindsley] - Added a note about versions of the docs. [Daniel Lindsley] - Go back to the default Sphinx theme. The custom Haystack theme is too much work and too little benefit. [Daniel Lindsley] - Added a note in the tutorial about building the schema when using Solr. Thanks to trey0 for the report! [Daniel Lindsley] - Fixed a bug where using ``SearchQuerySet.models()`` on an unregistered model would be silently ignored. [Daniel Lindsley] It is still silently ignored, but now emits a warning informing the user of why they may receive more results back than they expect. - Added notes about the ``simple`` backend in the docs. Thanks to notanumber for catching the omission. [Daniel Lindsley] - Removed erroneous old docs about Lucene support, which never landed. [Daniel Lindsley] - Merge branch 'master' of github.com:toastdriven/django-haystack. [Daniel Lindsley] - Fixed typo in the tutorial. Thanks fxdgear for pointing that out! [Daniel Lindsley] - Fixed a bug related to Unicode data in conjunction with the ``dummy`` backend. Thanks to kylemacfarlane for the report! [Daniel Lindsley] - Added Forkinit to Who Uses. [Daniel Lindsley] - Added Rampframe to Who Uses. [Daniel Lindsley] - Added other apps documentation for Haystack-related apps. [Daniel Lindsley] - Unified the way ``DEFAULT_OPERATOR`` is setup. [Daniel Lindsley] - You can now override ``ITERATOR_LOAD_PER_QUERY`` with a setting if you're consuming big chunks of a ``SearchQuerySet``. Thanks to kylemacfarlane for the report. [Daniel Lindsley] - Moved the preparation of faceting data to a ``SearchIndex.full_prepare()`` method for easier overriding. Thanks to xav for the suggestion! [Daniel Lindsley] - The ``more_like_this`` tag now silently fails if things go south. Thanks to piquadrat for the patch! [Daniel Lindsley] - Added a fleshed out ``simple_backend`` for basic usage + testing. [David Sauve] - ``SearchView.build_form()`` now accepts a dict to pass along to the form. Thanks to traviscline for the patch! [Daniel Lindsley] - Fixed the ``setup.py`` to include ``haystack.utils`` and added to the ``MANIFEST.in``. Thanks to jezdez for the patch! [Daniel Lindsley] - Fixed date faceting in Solr. [Daniel Lindsley] No more OOMs and very fast over large data sets. - Added the ``search_view_factory`` function for thread-safe use of ``SearchView``. [Daniel Lindsley] - Added more to the docs about the ``SearchQuerySet.narrow()`` method to describe when/why to use it. [Daniel Lindsley] - Fixed Whoosh tests. [Daniel Lindsley] Somewhere, a reference to the old index was hanging around causing incorrect failures. - The Whoosh backed now uses the ``AsyncWriter``, which ought to provide better performance. Requires Whoosh 0.3.15 or greater. [Daniel Lindsley] - Added a way to pull the correct fieldname, regardless if it's been overridden or not. [Daniel Lindsley] - Added docs about adding new fields. [Daniel Lindsley] - Removed a painful ``isinstance`` check which should make non-standard usages easier. [Daniel Lindsley] - Updated docs regarding reserved field names in Haystack. [Daniel Lindsley] - Pushed some of the new faceting bits down in the implementation. [Daniel Lindsley] - Removed unnecessary fields from the Solr schema template. [Daniel Lindsley] - Revamped how faceting is done within Haystack to make it easier to work with. [Daniel Lindsley] - Add more sites to Who Uses. [Daniel Lindsley] - Fixed a bug in ``ModelSearchIndex`` where the ``index_fieldname`` would not get set. Also added a way to override it in a general fashion. Thanks to traviscline for the patch! [Daniel Lindsley] - Backend API standardization. Thanks to batiste for the report! [Daniel Lindsley] - Removed a method that was supposed to have been removed before 1.0. Oops. [Daniel Lindsley] - Added the ability to override field names within the index. Thanks to traviscline for the suggestion and original patch! [Daniel Lindsley] - Corrected the AUTHORS because slai actually provided the patch. Sorry about that. [Daniel Lindsley] - Refined the internals of ``ModelSearchIndex`` to be a little more flexible. Thanks to traviscline for the patch! [Daniel Lindsley] - The Whoosh backend now supports ``RamStorage`` for use with testing or other non-permanent indexes. [Daniel Lindsley] - Fixed a bug in the ``Highlighter`` involving repetition and regular expressions. Thanks to alanzoppa for the original patch! [Daniel Lindsley] - Fixed a bug in the Whoosh backend when a ``MultiValueField`` is empty. Thanks to alanwj for the original patch! [Daniel Lindsley] - All dynamic imports now use ``importlib``. Thanks to bfirsh for the original patch mentioning this. [Daniel Lindsley] A backported version of ``importlib`` is included for compatibility with Django 1.0. - Altered ``EmptySearchQuerySet`` so it's usable from templates. Thanks to bfirsh for the patch! [Daniel Lindsley] - Added tests to ensure a Whoosh regression is no longer present. [Daniel Lindsley] - Fixed a bug in Whoosh where using just ``.models()`` would create an invalid query. Thanks to ricobl for the original patch. [Daniel Lindsley] - Forms with initial data now display it when used with SearchView. Thanks to osirius for the original patch. [Daniel Lindsley] - App order is now consistent with INSTALLED_APPS when running ``update_index``. [Daniel Lindsley] - Updated docs to reflect the recommended way to do imports in when defining ``SearchIndex`` classes. [Daniel Lindsley] This is not my preferred style but reduces the import errors some people experience. - Fixed omission of Xapian in the settings docs. Thanks to flebel for pointing this out. [Daniel Lindsley] - Little bits of cleanup related to testing. [Daniel Lindsley] - Fixed an error in the docs related to pre-rendering data. [Daniel Lindsley] - Added Pegasus News to Who Uses. [Daniel Lindsley] - Corrected an import in forms for consistency. Thanks to bkonkle for pointing this out. [Daniel Lindsley] - Fixed bug where passing a customized ``site`` would not make it down through the whole stack. Thanks to Peter Bengtsson for the report and original patch. [Daniel Lindsley] - Bumped copyright years. [Daniel Lindsley] - Changed Whoosh backend so most imports will raise the correct exception. Thanks to shabda for the suggestion. [Daniel Lindsley] - Refactored Solr's tests to minimize reindexes. Runs ~50% faster. [Daniel Lindsley] - Fixed a couple potential circular imports. [Daniel Lindsley] - The same field can now have multiple query facets. Thanks to bfirsh for the original patch. [Daniel Lindsley] - Added schema for testing Solr. [Daniel Lindsley] - Fixed a string interpolation bug when adding an invalid data facet. Thanks to simonw for the original patch. [Daniel Lindsley] - Fixed the default highlighter to give slightly better results, especially with short strings. Thanks to RobertGawron for the original patch. [Daniel Lindsley] - Changed the ``rebuild_index`` command so it can take all options that can be passed to either ``clear_index`` or ``update_index``. Thanks to brosner for suggesting this. [Daniel Lindsley] - Added ``--noinput`` flag to ``clear_index``. Thanks to aljosa for the suggestion. [Daniel Lindsley] - Updated the example in the template to be a little more real-world and user friendly. Thanks to j0hnsmith for pointing this out. [Daniel Lindsley] - Fixed a bug with the Whoosh backend where scores weren't getting populated correctly. Thanks to horribtastic for the report. [Daniel Lindsley] - Changed ``EmptySearchQuerySet`` so it returns an empty list when slicing instead of mistakenly running queries. Thanks to askfor for reporting this bug. [Daniel Lindsley] - Switched ``SearchView`` & ``FacetedSearchView`` to use ``EmptySearchQuerySet`` (instead of a regular list) when there are no results. Thanks to acdha for the original patch. [Daniel Lindsley] - Added RedditGifts to "Who Uses". [Daniel Lindsley] - Added Winding Road to "Who Uses". [Daniel Lindsley] - Added ryszard's full name to AUTHORS. [Daniel Lindsley] - Added initialization bits to part of the Solr test suite. Thanks to notanumber for pointing this out. [Daniel Lindsley] - Started the 1.1-alpha work. Apologies for not doing this sooner. [Daniel Lindsley] - Added an advanced setting for disabling Haystack's initialization in the event of a conflict with other apps. [Daniel Lindsley] - Altered ``SearchForm`` to use ``.is_valid()`` instead of ``.clean()``, which is a more idiomatic/correct usage. Thanks to askfor for the suggestion. [Daniel Lindsley] - Added MANIFEST to ignore list. [Daniel Lindsley] - Fixed Django 1.0 compatibility when using the Solr backend. [Daniel Lindsley] - Marked Haystack as 1.0 final. [Daniel Lindsley] - Incorrect test result from changing the documented way the ``highlight`` template tag gets called. [Daniel Lindsley] - Updated the example in faceting documentation to provide better results and explanation on the reasoning. [Daniel Lindsley] - Added further documentation about ``SearchIndex``/``RealTimeSearchIndex``. [Daniel Lindsley] - Added docs about `SearchQuerySet.highlight`. [toastdriven] - Added further docs on `RealTimeSearchIndex`. [toastdriven] - Added documentation on the ``RealTimeSearchIndex`` class. [toastdriven] - Fixed the documentation for the arguments on the `highlight` tag. Thanks to lucalenardi for pointing this out. [Daniel Lindsley] - Fixed tutorial to mention where the `NoteSearchIndex` should be placed. Thanks to bkeating for pointing this out. [Daniel Lindsley] - Marked Haystack as 1.0.0 release candidate 1. [Daniel Lindsley] - Haystack now requires Whoosh 0.3.5. [Daniel Lindsley] - Last minute documentation cleanup. [Daniel Lindsley] - Added documentation about the management commands that come with Haystack. [Daniel Lindsley] - Added docs on the template tags included with Haystack. [Daniel Lindsley] - Added docs on highlighting. [Daniel Lindsley] - Removed some unneeded legacy code that was causing conflicts when Haystack was used with apps that load all models (such as `django- cms2`, `localemiddleware` or `django-transmeta`). [Daniel Lindsley] - Removed old code from the `update_index` command. [Daniel Lindsley] - Altered spelling suggestion test to something a little more consistent. [Daniel Lindsley] - Added tests for slicing the end of a `RelatedSearchQuerySet`. [Daniel Lindsley] - Fixed case where `SearchQuerySet.more_like_this` would fail when using deferred Models. Thanks to Alex Gaynor for the original patch. [Daniel Lindsley] - Added default logging bits to prevent "No handlers found" message. [Daniel Lindsley] - BACKWARD-INCOMPATIBLE: Renamed `reindex` management command to `update_index`, renamed `clear_search_index` management command to `clear_index` and added a `rebuild_index` command to both clear & reindex. [Daniel Lindsley] - BACKWARD-INCOMPATIBLE: `SearchIndex` no longer hooks up `post_save/post_delete` signals for the model it's registered with. [Daniel Lindsley] If you use `SearchIndex`, you will have to manually cron up a `reindex` (soon to become `update_index`) management command to periodically refresh the data in your index. If you were relying on the old behavior, please use `RealTimeSearchIndex` instead, which does hook up those signals. - Ensured that, if a `MultiValueField` is marked as `indexed=False` in Whoosh, it ought not to post-process the field. [Daniel Lindsley] - Ensured data going into the indexes round-trips properly. Fixed `DateField`/`DateTimeField` handling for all backends and `MultiValueField` handling in Whoosh. [Daniel Lindsley] - Added a customizable `highlight` template tag plus an underlying `Highlighter` implementation. [Daniel Lindsley] - Added more documentation about using custom `SearchIndex.prepare_FOO` methods. [Daniel Lindsley] - With Whoosh 0.3.5+, the number of open files is greatly reduced. [Daniel Lindsley] - Corrected example in docs about `RelatedSearchQuerySet`. Thanks to askfor for pointing this out. [Daniel Lindsley] - Altered `SearchResult` objects to fail gracefully when the model/object can't be found. Thanks to akrito for the report. [Daniel Lindsley] - Fixed a bug where `auto_query` would fail to escape strings that pulled out for exact matching. Thanks to jefftriplett for the report. [Daniel Lindsley] - Added Brick Design to Who Uses. [Daniel Lindsley] - Updated backend support docs slightly. [Daniel Lindsley] - Added the ability to combine `SearchQuerySet`s via `&` or `|`. Thanks to reesefrancis for the suggestion. [Daniel Lindsley] - Revised the most of the tutorial. [Daniel Lindsley] - Better documented how user-provided data should be sanitized. [Daniel Lindsley] - Fleshed out the `SearchField` documentation. [Daniel Lindsley] - Fixed formatting on ``SearchField`` documentation. [Daniel Lindsley] - Added basic ``SearchField`` documentation. [Daniel Lindsley] More information about the kwargs and usage will be eventually needed. - Bumped the `ulimit` so Whoosh tests pass consistently on Mac OS X. [Daniel Lindsley] - Fixed the `default` kwarg in `SearchField` (and subclasses) to work properly from a user's perspective. [Daniel Lindsley] - BACKWARD-INCOMPATIBLE: Fixed ``raw_search`` to cooperate when paginating/slicing as well as many other conditions. [Daniel Lindsley] This no longer immediately runs the query, nor pokes at any internals. It also now takes into account other details, such as sorting & faceting. - Fixed a bug in the Whoosh backend where slicing before doing a hit count could cause strange results when paginating. Thanks to kylemacfarlane for the original patch. [Daniel Lindsley] - The Whoosh tests now deal with the same data set as the Solr tests and cover various aspects better. [Daniel Lindsley] - Started to pull out the real-time, signal-based updates out of the main `SearchIndex` class. Backward compatible for now. [Daniel Lindsley] - Fixed docs to include `utils` documentation. [Daniel Lindsley] - Updated instructions for installing `pysolr`. Thanks to sboisen for pointing this out. [Daniel Lindsley] - Added acdha to AUTHORS for previous commit. [Daniel Lindsley] - Added exception handling to the Solr Backend to silently fail/log when Solr is unavailable. Thanks to acdha for the original patch. [Daniel Lindsley] - The `more_like_this` tag is now tested within the suite. Also has lots of cleanup for the other Solr tests. [Daniel Lindsley] - On both the Solr & Whoosh backends, don't do an update if there's nothing being updated. [Daniel Lindsley] - Moved Haystack's internal fields out of the backends and into `SearchIndex.prepare`. [Daniel Lindsley] This is both somewhat more DRY as well as a step toward Haystack being useful to non-Django projects. - Fixed a bug in the `build_schema` where fields that aren't supposed to be indexed are still getting post-procesed by Solr. Thanks to Jonathan Slenders for the report. [Daniel Lindsley] - Added HUGE to Who Uses. [Daniel Lindsley] - Fixed bug in Whoosh where it would always generate spelling suggestions off the full query even when given a different query string to check against. [Daniel Lindsley] - Simplified the SQ object and removed a limitation on kwargs/field names that could be passed in. Thanks to traviscline for the patch. [Daniel Lindsley] - Documentation on `should_update` fixed to match the new signature. Thanks to kylemacfarlane for pointing this out. [Daniel Lindsley] - Fixed missing words in Best Practices documentation. Thanks to frankwiles for the original patch. [Daniel Lindsley] - The `update_object` method now passes along kwargs as needed to the `should_update` method. Thanks to askfor for the suggestion. [Daniel Lindsley] - Updated docs about the removal of the Whoosh fork. [Daniel Lindsley] - Removed extraneous `BadSearchIndex3` from test suite. Thanks notanumber! [Daniel Lindsley] - We actually want `repr`, not `str`. [Daniel Lindsley] - Pushed the `model_attr` check lower down into the `SearchField`s and make it occur later, so that exceptions come at a point where Django can better deal with them. [Daniel Lindsley] - Fixed attempting to access an invalid `model_attr`. Thanks to notanumber for the original patch. [Daniel Lindsley] - Added SQ objects (replacing the QueryFilter object) as the means to generate queries/query fragments. Thanks to traviscline for all the hard work. [Daniel Lindsley] The SQ object is similar to Django's Q object and allows for arbitrarily complex queries. Only backward incompatible if you were relying on the SearchQuery/QueryFilter APIs. - Reformatted debugging docs a bit. [Daniel Lindsley] - Added debugging information about the Whoosh lock error. [Daniel Lindsley] - Brought the TODO up to date. [Daniel Lindsley] - Added a warning to the documentation about how `__startswith` may not always provide the expected results. Thanks to codysoyland for pointing this out. [Daniel Lindsley] - Added debugging documentation, with more examples coming in the future. [Daniel Lindsley] - Added a new `basic_search` view as a both a working example of how to write traditional views and as a thread-safe view, which the class- based ones may/may not be. [Daniel Lindsley] - Fixed sample template in the documentation. Thanks to lemonad for pointing this out. [Daniel Lindsley] - Updated documentation to include a couple more Sphinx directives. Index is now more useful. [Daniel Lindsley] - Made links more obvious in documentation. [Daniel Lindsley] - Added an `example_project` demonstrating how a sample project might be setup. [Daniel Lindsley] - Fixed `load_backend` to use the argument passed instead of always the `settings.HAYSTACK_SEARCH_ENGINE`. Thanks to newgene for the report. [Daniel Lindsley] - Regression where sometimes `narrow_queries` got juggled into a list when it should be a set everywhere. Thanks tcline & ericholscher for the report. [Daniel Lindsley] - Updated the Whoosh backend's version requirement to reflect the fully working version of Whoosh. [Daniel Lindsley] - With the latest SVN version of Whoosh (r344), `SearchQuerySet()` now works properly in Whoosh. [Daniel Lindsley] - Added a `FacetedModelSearchForm`. Thanks to mcroydon for the original patch. [Daniel Lindsley] - Added translation capabilities to the `SearchForm` variants. Thanks to hejsan for pointing this out. [Daniel Lindsley] - Added AllForLocal to Who Uses. [Daniel Lindsley] - The underlying caching has been fixed so it no longer has to fill the entire cache before it to ensure consistency. [Daniel Lindsley] This results in significantly faster slicing and reduced memory usage. The test suite is more complete and ensures this functionality better. This also removes `load_all_queryset` from the main `SearchQuerySet` implementation. If you were relying on this behavior, you should use `RelatedSearchQuerySet` instead. - Log search queries with `DEBUG = True` for debugging purposes, similar to what Django does. [Daniel Lindsley] - Updated LJ's Who Uses information. [Daniel Lindsley] - Added Sunlight Labs & NASA to the Who Uses list. [Daniel Lindsley] - Added Eldarion to the Who Uses list. [Daniel Lindsley] - When more of the cache is populated, provide a more accurate `len()` of the `SearchQuerySet`. This ought to only affect advanced usages, like excluding previously-registered models or `load_all_queryset`. [Daniel Lindsley] - Fixed a bug where `SearchQuerySet`s longer than `REPR_OUTPUT_SIZE` wouldn't include a note about truncation when `__repr__` is called. [Daniel Lindsley] - Added the ability to choose which site is used when reindexing. Thanks to SmileyChris for pointing this out and the original patch. [Daniel Lindsley] - Fixed the lack of a `__unicode__` method on `SearchResult` objects. Thanks to mint_xian for pointing this out. [Daniel Lindsley] - Typo'd the setup.py changes. Thanks to jlilly for catching that. [Daniel Lindsley] - Converted all query strings to Unicode for Whoosh. Thanks to simonw108 for pointing this out. [Daniel Lindsley] - Added template tags to `setup.py`. Thanks to Bogdan for pointing this out. [Daniel Lindsley] - Added two more tests to the Whoosh backend, just to make sure. [Daniel Lindsley] - Corrected the way Whoosh handles `order_by`. Thanks to Rowan for pointing this out. [Daniel Lindsley] - For the Whoosh backend, ensure the directory is writable by the current user to try to prevent failed writes. [Daniel Lindsley] - Added a better label to the main search form field. [Daniel Lindsley] - Bringing the Whoosh backend up to version 0.3.0b14. This version of Whoosh has better query parsing, faster indexing and, combined with these changes, should cause fewer disruptions when used in a multiprocess/multithreaded environment. [Daniel Lindsley] - Added optional argument to `spelling_suggestion` that lets you provide a different query than the one built by the SearchQuerySet. [Daniel Lindsley] Useful for passing along a raw user-provided query, especially when there is a lot of post-processing done. - SearchResults now obey the type of data chosen in their corresponding field in the SearchIndex if present. Thanks to evgenius for the original report. [Daniel Lindsley] - Fixed a bug in the Solr backend where submitting an empty string to search returned an ancient and incorrect datastructure. Thanks kapa77 for the report. [Daniel Lindsley] - Fixed a bug where the cache would never properly fill due to the number of results returned being lower than the hit count. This could happen when there were results excluded due to being in the index but the model NOT being registered in the `SearchSite`. Thanks akrito and tcline for the report. [Daniel Lindsley] - Altered the docs to look more like the main site. [Daniel Lindsley] - Added a (short) list of who uses Haystack. Would love to have more on this list. [Daniel Lindsley] - Fixed docs on preparing data. Thanks fud. [Daniel Lindsley] - Added the `ModelSearchIndex` class for easier `SearchIndex` generation. [Daniel Lindsley] - Added a note about using possibly unsafe data with `filter/exclude`. Thanks to ryszard for pointing this out. [Daniel Lindsley] - Standardized the API on `date_facet`. Thanks to notanumber for the original patch. [Daniel Lindsley] - Moved constructing the schema down to the `SearchBackend` level. This allows more flexibility when creating a schema. [Daniel Lindsley] - Fixed a bug where a hyphen provided to `auto_query` could break the query string. Thanks to ddanier for the report. [Daniel Lindsley] - BACKWARD INCOMPATIBLE - For consistency, `get_query_set` has been renamed to `get_queryset` on `SearchIndex` classes. [Daniel Lindsley] A simple search & replace to remove the underscore should be all that is needed. - Missed two bits while updating the documentation for the Xapian backend. [Daniel Lindsley] - Updated documentation to add the Xapian backend information. A big thanks to notatnumber for all his hard work on the Xapian backend. [Daniel Lindsley] - Added `EmptySearchQuerySet`. Thanks to askfor for the suggestion! [Daniel Lindsley] - Added "Best Practices" documentation. [Daniel Lindsley] - Added documentation about the `HAYSTACK_SITECONF` setting. [Daniel Lindsley] - Fixed erroneous documentation on Xapian not supporting boost. Thanks notanumber! [Daniel Lindsley] - BACKWARD INCOMPATIBLE - The `haystack.autodiscover()` and other site modifications now get their own configuration file and should no longer be placed in the `ROOT_URLCONF`. Thanks to SmileyChris for the original patch and patrys for further feedback. [Daniel Lindsley] - Added `verbose_name_plural` to the `SearchResult` object. [Daniel Lindsley] - Added a warning about ordering by integers with the Whoosh backend. [Daniel Lindsley] - Added a note about ordering and accented characters. [Daniel Lindsley] - Updated the `more_like_this` tag to allow for narrowing the models returned by the tag. [Daniel Lindsley] - Fixed `null=True` for `IntegerField` and `FloatField`. Thanks to ryszard for the report and original patch. [Daniel Lindsley] - Reverted aabdc9d4b98edc4735ed0c8b22aa09796c0a29ab as it would cause mod_wsgi environments to fail in conjunction with the admin on Django 1.1. [Daniel Lindsley] - Added the start of a glossary of terminology. [Daniel Lindsley] - Various documentation fixes. Thanks to sk1p & notanumber. [Daniel Lindsley] - The `haystack.autodiscover()` and other site modifications may now be placed in ANY URLconf, not just the `ROOT_URLCONF`. Thanks to SmileyChris for the original patch. [Daniel Lindsley] - Fixed invalid/empty pages in the SearchView. Thanks to joep and SmileyChris for patches. [Daniel Lindsley] - Added a note and an exception about consistent fieldnames for the document field across all `SearchIndex` classes. Thanks sk1p_! [Daniel Lindsley] - Possible thread-safety fix related to registration handling. [Daniel Lindsley] - BACKWARD INCOMPATIBLE - The 'boost' method no longer takes kwargs. This makes boost a little more useful by allowing advanced terms. [Daniel Lindsley] To migrate code, convert multiple kwargs into separate 'boost' calls, quote what was the key and change the '=' to a ','. - Updated documentation to match behavioral changes to MLT. [Daniel Lindsley] - Fixed a serious bug in MLT on Solr. Internals changed a bit and now things work correctly. [Daniel Lindsley] - Removed erroneous 'zip_safe' from setup.py. Thanks ephelon. [Daniel Lindsley] - Added `null=True` to fields, allowing you to ignore/skip a field when indexing. Thanks to Kevin for the original patch. [Daniel Lindsley] - Fixed a standing test failure. The dummy setup can't do `load_all` due to mocking. [Daniel Lindsley] - Added initial `additional_query` to MLT to allow for narrowing results. [Daniel Lindsley] - Fixed nasty bug where results would get duplicated due to cached results. [Daniel Lindsley] - Altered `ITERATOR_LOAD_PER_QUERY` from 20 to 10. [Daniel Lindsley] - Corrected tutorial when dealing with fields that have `use_template=True`. [Daniel Lindsley] - Updated documentation to reflect basic Solr setup. [Daniel Lindsley] - Fix documentation on grabbing Whoosh and on the 'load_all' parameter for SearchForms. [Daniel Lindsley] - Fixed bug where the '__in' filter wouldn't work with phrases or data types other than one-word string/integer. [Daniel Lindsley] - Fixed bug so that the 'load_all' option in 'SearchView' now actually does what it says it should. How embarrassing... [Daniel Lindsley] - Added ability to specify custom QuerySets for loading records via 'load_all'/'load_all_queryset'. [Daniel Lindsley] - Fixed a bug where results from non-registered models could appear in the results. [Daniel Lindsley] - BACKWARD INCOMPATIBLE - Changed 'module_name' to 'model_name' throughout Haystack related to SearchResult objects. Only incompatible if you were relying on this attribute. [Daniel Lindsley] - Added the ability to fetch additional and stored fields from a SearchResult as well as documentation on the SearchResult itself. [Daniel Lindsley] - Added the ability to look through relations in SearchIndexes via '__'. [Daniel Lindsley] - Added note about the 'text' fieldname convention. [Daniel Lindsley] - Added an 'update_object' and 'remove_object' to the SearchSite objects as a shortcut. [Daniel Lindsley] - Recover gracefully from queries Whoosh judges to be invalid. [Daniel Lindsley] - Missed test from previous commit. [Daniel Lindsley] - Added stemming support to Whoosh. [Daniel Lindsley] - Removed the commented version. [Daniel Lindsley] - Django 1.0.X compatibility fix for the reindex command. [Daniel Lindsley] - Reindexes should now consume a lot less RAM. [Daniel Lindsley] Evidently, when you run a ton of queries touching virtually everything in your DB, you need to clean out the "logged" queries from the connection. Sad but true. - Altered `SearchBackend.remove` and `SearchBackend.get_identifier` to accept an object or a string identifier (in the event the object is no longer available). [Daniel Lindsley] This is useful in an environment where you no longer have the original object on hand and know what it is you wish to delete. - Added a simple (read: ghetto) way to run the test suite without having to mess with settings. [Daniel Lindsley] - Added a setting `HAYSTACK_BATCH_SIZE` to control how many objects are processed at once when running a reindex. [Daniel Lindsley] - Fixed import that was issuing a warning. [Daniel Lindsley] - Further tests to make sure `unregister` works appropriately as well, just to be paranoid. [Daniel Lindsley] - Fixed a bizarre bug where backends may see a different site object than the rest of the application code. THIS REQUIRES SEARCH & REPLACING ALL INSTANCES OF `from haystack.sites import site` TO `from haystack import site`. [Daniel Lindsley] No changes needed if you've been using `haystack.autodiscover()`. - Pushed save/delete signal registration down to the SearchIndex level. [Daniel Lindsley] This should make it easier to alter how individual indexes are setup, allowing you to queue updates, prevent deletions, etc. The internal API changed slightly. - Created a default 'clean' implementation, as the first three (and soon fourth) backends all use identical code. [Daniel Lindsley] - Updated tests to match new 'model_choices'. [Daniel Lindsley] - Added timeout support to Solr. [Daniel Lindsley] - Capitalize the Models in the model_choices. [Daniel Lindsley] - Removed unnecessary import. [Daniel Lindsley] - No longer need to watch for DEBUG in the 'haystack_info' command. [Daniel Lindsley] - Fixed bug in Whoosh backend when spelling suggestions are disabled. [Daniel Lindsley] - Added a "clear_search_index" management command. [Daniel Lindsley] - Removed comments as pysolr now supports timeouts and the other comment no longer applies. [Daniel Lindsley] - Removed Solr-flavored schema bits. [Daniel Lindsley] Still need to work out a better way to handle user created fields that don't fit neatly into subclassing one of the core Field types. - Moved informational messages to a management command to behave better when using dumpdata or wsgi. [Daniel Lindsley] - Changed some Solr-specific field names. Requires a reindex. [Daniel Lindsley] - Typo'd docstring. [Daniel Lindsley] - Removed empty test file from spelling testing. [Daniel Lindsley] - Documentation for getting spelling support working on Solr. [Daniel Lindsley] - Initial spelling support added. [Daniel Lindsley] - Added a 'more_like_this' template tag. [Daniel Lindsley] - Removed an unnecessary 'run'. This cause MLT (and potentially 'raw_search') to fail by overwriting the results found. [Daniel Lindsley] - Added Whoosh failure. Needs inspecting. [Daniel Lindsley] - Finally added views/forms documentation. A touch rough still. [Daniel Lindsley] - Fixed a bug in FacetedSearchView where a SearchQuerySet method could be called on an empty list instead. [Daniel Lindsley] - More faceting documentation. [Daniel Lindsley] - Started faceting documentation. [Daniel Lindsley] - Updated docs to finally include details about faceting. [Daniel Lindsley] - Empty or one character searches in Whoosh returned the wrong data structure. Thanks for catching this, silviogutierrez! [Daniel Lindsley] - Added scoring to Whoosh now that 0.1.20+ support it. [Daniel Lindsley] - Fixed a bug in the Solr tests due to recent changes in pysolr. [Daniel Lindsley] - Added documentation on the 'narrow' method. [Daniel Lindsley] - Added additional keyword arguments on raw_search. [Daniel Lindsley] - Added 'narrow' support in Whoosh. [Daniel Lindsley] - Fixed Whoosh backend's handling of pre-1900 dates. Thanks JoeGermuska! [Daniel Lindsley] - Backed out the Whoosh quoted dates patch. [Daniel Lindsley] Something still seems amiss in the Whoosh query parser, as ranges and dates together don't seem to get parsed together properly. - Added a small requirements section to the docs. [Daniel Lindsley] - Added notes about enabling the MoreLikeThisHandler within Solr. [Daniel Lindsley] - Revised how tests are done so each backend now gets its own test app. [Daniel Lindsley] All tests pass once again. - Added 'startswith' filter. [Daniel Lindsley] - Fixed the __repr__ method on QueryFilters. Thanks JoeGermuska for the original patch! [Daniel Lindsley] - BACKWARDS INCOMPATIBLE - Both the Solr & Whoosh backends now provide native Python types back in SearchResults. [Daniel Lindsley] This also allows Whoosh to use native types better from the 'SearchQuerySet' API itself. This unfortunately will also require all Whoosh users to reindex, as the way some data (specifically datetimes/dates but applicable to others) is stored in the index. - SearchIndexes now support inheritance. Thanks smulloni! [Daniel Lindsley] - Added FacetedSearchForm to make handling facets easier. [Daniel Lindsley] - Heavily refactored the SearchView to take advantage of being a class. [Daniel Lindsley] It should now be much easier to override bits without having to copy-paste the entire __call__ method, which was more than slightly embarrassing before. - Fixed Solr backend so that it properly converts native Python types to something Solr can handle. Thanks smulloni for the original patch! [Daniel Lindsley] - SearchResults now include a verbose name for display purposes. [Daniel Lindsley] - Fixed reverse order_by's when using Whoosh. Thanks matt_c for the original patch. [Daniel Lindsley] - Handle Whoosh stopwords behavior when provided a single character query string. [Daniel Lindsley] - Lightly refactored tests to only run engines with their own settings. [Daniel Lindsley] - Typo'd the tutorial when setting up your own SearchSite. Thanks mcroydon! [Daniel Lindsley] - Altered loading statements to only display when DEBUG is True. [Daniel Lindsley] - Write to STDERR where appropriate. Thanks zerok for suggesting this change. [Daniel Lindsley] - BACKWARD INCOMPATIBLE - Altered the search query param to 'q' instead of 'query'. Thanks simonw for prompting this change. [Daniel Lindsley] - Removed the Whoosh patch in favor of better options. Please see the documentation. [Daniel Lindsley] - Added Whoosh patch for 0.1.15 to temporarily fix reindexes. [Daniel Lindsley] - Altered the reindex command to handle inherited models. Thanks smulloni! [Daniel Lindsley] - Removed the no longer needed Whoosh patch. [Daniel Lindsley] Whoosh users should upgrade to the latest Whoosh (0.1.15) as it fixes the issues that the patch covers as well as others. - Documented the 'content' shortcut. [Daniel Lindsley] - Fixed an incorrect bit of documentation on the default operator setting. Thanks benspaulding! [Daniel Lindsley] - Added documentation about Haystack's various settings. [Daniel Lindsley] - Corrected an issue with the Whoosh backend that can occur when no indexes are registered. Now provides a better exception. [Daniel Lindsley] - Documentation fixes. Thanks benspaulding! [Daniel Lindsley] - Fixed Whoosh patch, which should help with the "KeyError" exceptions when searching with models. Thanks Matias Costa! [Daniel Lindsley] - Improvements to the setup.py. Thanks jezdez & ask! [Daniel Lindsley] - Fixed the .gitignore. Thanks ask! [Daniel Lindsley] - FacetedSearchView now inherits from SearchView. Thanks cyberdelia! [Daniel Lindsley] This will matter much more soon, as SearchView is going to be refactored to be more useful and extensible. - Documentation fixes. [Daniel Lindsley] - Altered the whoosh patch. Should apply cleanly now. [Daniel Lindsley] - Better linking to the search engine installation notes. [Daniel Lindsley] - Added documentation on setting up the search engines. [Daniel Lindsley] - Provide an exception when importing a backend dependency fails. Thanks brosner for the initial patch. [Daniel Lindsley] - Yay stupid typos! [Daniel Lindsley] - Relicensing under BSD. Thanks matt_c for threatening to use my name in an endorsement of a derived product! [Daniel Lindsley] - Fixed a bug in ModelSearchForm. Closes #1. Thanks dotsphinx! [Daniel Lindsley] - Added link to pysolr binding. [Daniel Lindsley] - Refined documentation on preparing SearchIndex data. [Daniel Lindsley] - Changed existing references from 'model_name' to 'module_name'. [Daniel Lindsley] This was done to be consistent both internally and with Django. Thanks brosner! - Documentation improvements. Restyled and friendlier intro page. [Daniel Lindsley] - Added documentation on preparing data. [Daniel Lindsley] - Additions and re-prioritizing the TODO list. [Daniel Lindsley] - Added warnings to Whoosh backend in place of silently ignoring unsupported features. [Daniel Lindsley] - Corrected Xapian's capabilities. Thanks richardb! [Daniel Lindsley] - BACKWARD INCOMPATIBLE - Altered all settings to be prefixed with HAYSTACK_. Thanks Collin! [Daniel Lindsley] - Test cleanup from previous commits. [Daniel Lindsley] - Changed the DEFAULT_OPERATOR back to 'AND'. Thanks richardb! [Daniel Lindsley] - Altered the way registrations get handled. [Daniel Lindsley] - Various fixes. Thanks brosner! [Daniel Lindsley] - Added new 'should_update' method to documentation. [Daniel Lindsley] - Added 'should_update' method to SearchIndexes. [Daniel Lindsley] This allows you to control, on a per-index basis, what conditions will cause an individual object to reindex. Useful for models that update frequently with changes that don't require indexing. - Added FAQ docs. [Daniel Lindsley] - Alter Whoosh backend to commit regardless. This avoids locking issues that can occur on higher volume sites. [Daniel Lindsley] - A more efficient implementation of index clearing in Whoosh. [Daniel Lindsley] - Added details about settings needed in settings.py. [Daniel Lindsley] - Added setup.py. Thanks cyberdelia for prompting it. [Daniel Lindsley] - Reindex management command now can reindex a limited range (like last 24 hours). Thanks traviscline. [Daniel Lindsley] - More things to do. [Daniel Lindsley] - Documentation formatting fixes. [Daniel Lindsley] - Added SearchBackend docs. [Daniel Lindsley] - Corrected reST formatting. [Daniel Lindsley] - Additional TODO's. [Daniel Lindsley] - Initial SearchIndex documentation. [Daniel Lindsley] - Formally introduced the TODO. [Daniel Lindsley] - Updated backend support list. [Daniel Lindsley] - Added initial documentation for SearchSites. [Daniel Lindsley] - Changed whoosh backend to fix limiting sets. Need to revisit someday. [Daniel Lindsley] - Added patch for Whoosh backend and version notes in documentation. [Daniel Lindsley] - Initial Whoosh backend complete. [Daniel Lindsley] Does not yet support highlighting or scoring. - Removed some unnecessary dummy code. [Daniel Lindsley] - Work on trying to get the default site to load reliably in all cases. [Daniel Lindsley] - Trimmed down the urls for tests now that the dummy backend works correctly. [Daniel Lindsley] - Dummy now correctly loads the right SearchBackend. [Daniel Lindsley] - Removed faceting from the default SearchView. [Daniel Lindsley] - Refactored tests so they are no longer within the haystack app. [Daniel Lindsley] Further benefits include less mocking and haystack's tests no longer contributing overall testing of end-user apps. Documentation included. - Removed old comment. [Daniel Lindsley] - Fixed a potential race condition. Also, since there's no way to tell when everything is ready to go in Django, adding an explicit call to SearchQuerySet's __init__ to force the site to load if it hasn't already. [Daniel Lindsley] - More tests on models() support. [Daniel Lindsley] - Pulled schema building out into the site to leverage across backends. [Daniel Lindsley] - Altered backend loading for consistency with Django and fixed the long-incorrect-for-non-obvious-and-tedious-reasons version number. Still beta but hopefully that changes soon. [Daniel Lindsley] - Missed a spot when fixing SearchSites. [Daniel Lindsley] - BACKWARD INCOMPATIBLE - Created a class name conflict during the last change (double use of ``SearchIndex``). Renamed original ``SearchIndex`` to ``SearchSite``, which is slightly more correct anyhow. [Daniel Lindsley] This will only affect you if you've custom built sites (i.e. not used ``autodiscover()``. - More documentation. Started docs on SearchQuery. [Daniel Lindsley] - Further fleshed out SearchQuerySet documentation. [Daniel Lindsley] - BACKWARD INCOMPATIBLE (2 of 2) - Altered autodiscover to search for 'search_indexes.py' instead of 'indexes.py' to prevent collisions and be more descriptive. [Daniel Lindsley] - BACKWARD INCOMPATIBLE (1 of 2) - The ModelIndex class has been renamed to be SearchIndex to make room for future improvements. [Daniel Lindsley] - Fleshed out a portion of the SearchQuerySet documentation. [Daniel Lindsley] - SearchQuerySet.auto_query now supports internal quoting for exact matches. [Daniel Lindsley] - Fixed semi-serious issue with SearchQuery objects, causing bits to leak from one query to the next when cloning. [Daniel Lindsley] - Altered Solr port for testing purposes. [Daniel Lindsley] - Now that Solr and core feature set are solid, moved haystack into beta status. [Daniel Lindsley] - Added simple capabilities for retrieving facets back. [Daniel Lindsley] - Bugfix to make sure model choices don't get loaded until after the IndexSite is populated. [Daniel Lindsley] - Initial faceting support complete. [Daniel Lindsley] - Query facets tested. [Daniel Lindsley] - Bugfix to (field) facets. [Daniel Lindsley] Using a dict is inappropriate, as the output from Solr is sorted by count. Now using a two-tuple. - Backward-incompatible changes to faceting. Date-based faceting is now present. [Daniel Lindsley] - Solr implementation of faceting started. Needs more tests. [Daniel Lindsley] - Initial faceting support in place. Needs more thought and a Solr implementation. [Daniel Lindsley] - Unbreak iterables in queries. [Daniel Lindsley] - Bugfixes for Unicode handling and loading deleted models. [Daniel Lindsley] - Fixed bug in Solr's run method. [Daniel Lindsley] - Various bug fixes. [Daniel Lindsley] - Backward-Incompatible: Refactored ModelIndexes to allow greater customization before indexing. See "prepare()" methods. [Daniel Lindsley] - Updated "build_solr_schema" command for revised fields. [Daniel Lindsley] - Refactored SearchFields. Lightly backwards-incompatible. [Daniel Lindsley] - No more duplicates from the "build_solr_schema" management command. [Daniel Lindsley] - Removed the kwargs. Explicit is better than implicit. [Daniel Lindsley] - Tests for highlighting. [Daniel Lindsley] - Added initial highlighting support. Needs tests and perhaps a better implementation. [Daniel Lindsley] - Started "build_solr_schema" command. Needs testing with more than one index. [Daniel Lindsley] - Argh. ".select_related()" is killing reindexes. Again. [Daniel Lindsley] - Stored fields now come back as part of the search result. [Daniel Lindsley] - Fixed Solr's SearchQuery.clean to handle reserved words more appropriately. [Daniel Lindsley] - Filter types seem solid and have tests. [Daniel Lindsley] - App renamed (for namespace/sanity/because it's really different reasons). [Daniel Lindsley] - Started trying to support the various filter types. Needs testing and verification. [Daniel Lindsley] - Fixed tests in light of the change to "OR". [Daniel Lindsley] - Readded "select_related" to reindex command. [Daniel Lindsley] - I am a moron. [Daniel Lindsley] - "OR" is now the default operator. Also, "auto_query" now handles not'ed keywords. [Daniel Lindsley] - "More Like This" now implemented and functioning with Solr backend. [Daniel Lindsley] - Removed broken references to __name__. [Daniel Lindsley] - Internal documentation fix. [Daniel Lindsley] - Solr backend can now clear on a per-model basis. [Daniel Lindsley] - Solr backend tests fleshed out. Initial stability of Solr. [Daniel Lindsley] This needs more work (as does everything) but it seems to be working reliably from my testing (both unit and "real-world"). Onward and upward. - Massive renaming/refactoring spree. Tests 100% passing again. [Daniel Lindsley] - Renamed BaseSearchQuerySet to SearchQuerySet. Now requires instantiation. [Daniel Lindsley] - Standardizing syntax. [Daniel Lindsley] - Backend support update. [Daniel Lindsley] - An attempt to make sure the main IndexSite is always setup, even outside web requests. Also needs improvement. [Daniel Lindsley] - Reindexes now work. [Daniel Lindsley] - Some painful bits to make things work for now. Needs improvement. [Daniel Lindsley] - Support kwargs on the search. [Daniel Lindsley] - Move solr backend tests in prep for fully testing the backend. [Daniel Lindsley] - Some ContentField/StoredField improvements. [Daniel Lindsley] StoredFields now have a unique template per field (as they should have from the start) and there's a touch more checking. You can also now override the template name for either type of field. - Fixed backend loading upon unpickling SearchBackend. [Daniel Lindsley] - Tweak internal doc. [Daniel Lindsley] - MOAR DOCS. [Daniel Lindsley] - Internal documentation and cleanup. Also alters the behavior of SearchQuerySet's "order_by" method slightly, bringing it more in-line with QuerySet's behavior. [Daniel Lindsley] - Documentation/license updates. [Daniel Lindsley] - Fixed ModelIndexes and created tests for them. 100% tests passing again. [Daniel Lindsley] - Started refactoring ModelIndexes. Needs tests (and possibly a little love). [Daniel Lindsley] - Implemented Solr's boost, clean, multiple order-by. Fixed Solr's score retrieval (depends on custom pysolr) and exact match syntax. [Daniel Lindsley] - Minor changes/cleanup. [Daniel Lindsley] - Updated docs and a FIXME. [Daniel Lindsley] - SearchView/SearchForm tests passing. [Daniel Lindsley] - Changed BaseSearchQuery to accept a SearchBackend instance instead of the class. [Daniel Lindsley] - Better dummy implementation, a bugfix to raw_search and SearchView/SearchForm tests. [Daniel Lindsley] - Temporarily changed the Solr backend to ignore fields. Pysolr will need a patch and then reenable this. [Daniel Lindsley] - Merge branch 'master' of ssh://daniel@mckenzie/home/daniel/djangosearch_refactor into HEAD. [Daniel Lindsley] - Started SearchView tests and added URLconf. [Daniel Lindsley] - Started SearchView tests and added URLconf. [Daniel Lindsley] - Added note about basic use. Needs refactoring. [Matt Croydon] - Merged index.rst. [Matt Croydon] - Fixed result lookups when constructing a SearchResult. [Daniel Lindsley] - Added more docs. [Daniel Lindsley] - Added FIXME for exploration on Solr backend. [Daniel Lindsley] - Solr's SearchQuery now handles phrases (exact match). [Daniel Lindsley] - More work on the Solr backend. [Daniel Lindsley] - Added more imports for future test coverage. [Daniel Lindsley] - Added stubs for backend tests. [Daniel Lindsley] - Documentation updates. [Daniel Lindsley] - Refactored forms/views. Needs tests. [Daniel Lindsley] - Removed old entries in .gitignore. [Daniel Lindsley] - Implemented load_all. [Daniel Lindsley] - Fixed query result retrieval. [Daniel Lindsley] - Updated documentation index and tweaked overview formatting. [Matt Croydon] - Slight docs improvements. [Daniel Lindsley] - Started work on Solr backend. [Daniel Lindsley] - Ignore _build. [Matt Croydon] - Refactored documentation to format better in Sphinx. [Matt Croydon] - Added _build to .gitignore. [Matt Croydon] - Added sphinx config for documentation. [Matt Croydon] - Verified _fill_cache behavior. 100% test pass. [Daniel Lindsley] - Added a couple new desirable bits of functionality. Mostly stubbed. [Daniel Lindsley] - Removed fixme and updated docs. [Daniel Lindsley] - Removed an old reference to SearchPaginator. [Daniel Lindsley] - Updated import paths to new backend Base* location. [Daniel Lindsley] - Relocated base backend classes to __init__.py for consistency with Django. [Daniel Lindsley] - BaseSearchQuerySet initial API complete and all but working. One failing test related to caching results. [Daniel Lindsley] - Added new (improved?) template path for index templates. [Daniel Lindsley] - Removed SearchPaginator, as it no longer provides anything over the standard Django Paginator. [Daniel Lindsley] - Added len/iter support to BaseSearchQuerySet. Need to finish getitem support and test. [Daniel Lindsley] - Started to update ModelIndex. [Daniel Lindsley] - Started to alter dummy to match new class names/API. [Daniel Lindsley] - Little bits of cleanup. [Daniel Lindsley] - Added overview of where functionality belongs in djangosearch. This should likely make it's way into other docs and go away eventually. [Daniel Lindsley] - BaseSearchQuery now tracks filters via QueryFilter objects. Tests complete for QueryFilter and nearly complete for BaseSearchQuery. [Daniel Lindsley] - Started docs on creating new backends. [Daniel Lindsley] - Started tests for BaseSearchQuery and BaseSearchQuerySet. [Daniel Lindsley] - Fixed site loading. [Daniel Lindsley] - More work on the Base* classes. [Daniel Lindsley] - Started docs on creating new backends. [Daniel Lindsley] - Yet more work on BaseSearchQuerySet. Now with fewer FIXMEs. [Daniel Lindsley] - More work on BaseSearchQuerySet and added initial BaseSearchQuery object. [Daniel Lindsley] - Removed another chunk of SearchPaginator as SearchQuerySet becomes more capable. Hopefully, SearchPaginator will simply go away soon. [Daniel Lindsley] - Fixed ModelSearchForm to check the site's registered models. [Daniel Lindsley] - Reenabled how other backends might load. [Daniel Lindsley] - Added ignores. [Daniel Lindsley] - Started documenting what backends are supported and what they can do. [Daniel Lindsley] - More work on SearchQuerySet. [Daniel Lindsley] - More renovation and IndexSite's tests pass 100%. [Daniel Lindsley] - Fleshed out sites tests. Need to setup environment in order to run them. [Daniel Lindsley] - Started adding tests. [Daniel Lindsley] - First blush at SearchQuerySet. Non-functional, trying to lay out API and basic funationality. [Daniel Lindsley] - Removed old results.py in favor of the coming SearchQuerySet. [Daniel Lindsley] - Noted future improvements on SearchPaginator. [Daniel Lindsley] - Removed old reference to autodiscover and added default site a la NFA. [Daniel Lindsley] - Commented another use of RELEVANCE. [Daniel Lindsley] - Little backend tweaks. [Daniel Lindsley] - Added autodiscover support. [Daniel Lindsley] - Readded management command. [Daniel Lindsley] - Added SearchView and ModelSearchForm back in. Needs a little work. [Daniel Lindsley] - Readded results. Need to look at SoC for ideas. [Daniel Lindsley] - Readded paginator. Needs docs/tests. [Daniel Lindsley] - Readded core backends + solr. Will add others as they reach 100% functionality. [Daniel Lindsley] - Added ModelIndex back in. Customized to match new setup. [Daniel Lindsley] - Added signal registration as well as some introspection capabilities. [Daniel Lindsley] - Initial commit. Basic IndexSite implementation complete. Needs tests. [Daniel Lindsley] django-haystack-2.8.0/docs/conf.py000066400000000000000000000151331325051407000170170ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # Haystack documentation build configuration file, created by # sphinx-quickstart on Wed Apr 15 08:50:46 2009. # # This file is execfile()d with the current directory set to its containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. from __future__ import absolute_import, division, print_function, unicode_literals import os import sys # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. #sys.path.append(os.path.abspath('.')) # -- General configuration ----------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = [] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix of source filenames. source_suffix = '.rst' # The encoding of source files. #source_encoding = 'utf-8' # The master toctree document. master_doc = 'toc' # General information about the project. project = u'Haystack' copyright = u'2009-2016, Daniel Lindsley' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # The short X.Y version. version = '2.5' # The full version, including alpha/beta/rc tags. release = '2.5.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. #language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: #today = '' # Else, today_fmt is used as the format for a strftime call. #today_fmt = '%B %d, %Y' # List of documents that shouldn't be included in the build. #unused_docs = [] # List of directories, relative to source directory, that shouldn't be searched # for source files. exclude_trees = ['_build'] # The reST default role (used for this markup: `text`) to use for all documents. #default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. #add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). #add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. #show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # A list of ignored prefixes for module index sorting. #modindex_common_prefix = [] # -- Options for HTML output --------------------------------------------------- # The theme to use for HTML and HTML Help pages. Major themes that come with # Sphinx are currently 'default' and 'sphinxdoc'. # html_theme = 'haystack_theme' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # html_theme_options = { # "rightsidebar": "true", # "bodyfont": "'Helvetica Neue', Arial, sans-serif", # "sidebarbgcolor": "#303c0c", # "sidebartextcolor": "#effbcb", # "sidebarlinkcolor": "#eef7ab", # "relbarbgcolor": "#caecff", # "relbartextcolor": "#262511", # "relbarlinkcolor": "#262511", # "footerbgcolor": "#262511", # } # Add any paths that contain custom themes here, relative to this directory. html_theme_path = ['.'] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". #html_title = None # A shorter title for the navigation bar. Default is the same as html_title. #html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. #html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. #html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. #html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. #html_use_smartypants = True # Custom sidebar templates, maps document names to template names. #html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. #html_additional_pages = {} # If false, no module index is generated. #html_use_modindex = True # If false, no index is generated. #html_use_index = True # If true, the index is split into individual pages for each letter. #html_split_index = False # If true, links to the reST sources are added to the pages. #html_show_sourcelink = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. #html_use_opensearch = '' # If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). #html_file_suffix = '' # Output file base name for HTML help builder. htmlhelp_basename = 'Haystackdoc' # -- Options for LaTeX output -------------------------------------------------- # The paper size ('letter' or 'a4'). #latex_paper_size = 'letter' # The font size ('10pt', '11pt' or '12pt'). #latex_font_size = '10pt' # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ ('index', 'Haystack.tex', u'Haystack Documentation', u'Daniel Lindsley', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of # the title page. #latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. #latex_use_parts = False # Additional stuff for the LaTeX preamble. #latex_preamble = '' # Documents to append as an appendix to all manuals. #latex_appendices = [] # If false, no module index is generated. #latex_use_modindex = True django-haystack-2.8.0/docs/contributing.rst000066400000000000000000000122221325051407000207550ustar00rootroot00000000000000============ Contributing ============ Haystack is open-source and, as such, grows (or shrinks) & improves in part due to the community. Below are some guidelines on how to help with the project. Philosophy ========== * Haystack is BSD-licensed. All contributed code must be either * the original work of the author, contributed under the BSD, or... * work taken from another project released under a BSD-compatible license. * GPL'd (or similar) works are not eligible for inclusion. * Haystack's git master branch should always be stable, production-ready & passing all tests. * Major releases (1.x.x) are commitments to backward-compatibility of the public APIs. Any documented API should ideally not change between major releases. The exclusion to this rule is in the event of either a security issue or to accommodate changes in Django itself. * Minor releases (x.3.x) are for the addition of substantial features or major bugfixes. * Patch releases (x.x.4) are for minor features or bugfixes. Guidelines For Reporting An Issue/Feature ========================================= So you've found a bug or have a great idea for a feature. Here's the steps you should take to help get it added/fixed in Haystack: * First, check to see if there's an existing issue/pull request for the bug/feature. All issues are at https://github.com/toastdriven/django-haystack/issues and pull reqs are at https://github.com/toastdriven/django-haystack/pulls. * If there isn't one there, please file an issue. The ideal report includes: * A description of the problem/suggestion. * How to recreate the bug. * If relevant, including the versions of your: * Python interpreter * Django * Haystack * Search engine used (as well as bindings) * Optionally of the other dependencies involved * Ideally, creating a pull request with a (failing) test case demonstrating what's wrong. This makes it easy for us to reproduce & fix the problem. Instructions for running the tests are at :doc:`index` You might also hop into the IRC channel (``#haystack`` on ``irc.freenode.net``) & raise your question there, as there may be someone who can help you with a work-around. Guidelines For Contributing Code ================================ If you're ready to take the plunge & contribute back some code/docs, the process should look like: * Fork the project on GitHub into your own account. * Clone your copy of Haystack. * Make a new branch in git & commit your changes there. * Push your new branch up to GitHub. * Again, ensure there isn't already an issue or pull request out there on it. If there is & you feel you have a better fix, please take note of the issue number & mention it in your pull request. * Create a new pull request (based on your branch), including what the problem/feature is, versions of your software & referencing any related issues/pull requests. In order to be merged into Haystack, contributions must have the following: * A solid patch that: * is clear. * works across all supported versions of Python/Django. * follows the existing style of the code base (mostly PEP-8). * comments included as needed. * A test case that demonstrates the previous flaw that now passes with the included patch. * If it adds/changes a public API, it must also include documentation for those changes. * Must be appropriately licensed (see "Philosophy"). * Adds yourself to the AUTHORS file. If your contribution lacks any of these things, they will have to be added by a core contributor before being merged into Haystack proper, which may take substantial time for the all-volunteer team to get to. Guidelines For Core Contributors ================================ If you've been granted the commit bit, here's how to shepherd the changes in: * Any time you go to work on Haystack, please use ``git pull --rebase`` to fetch the latest changes. * Any new features/bug fixes must meet the above guidelines for contributing code (solid patch/tests passing/docs included). * Commits are typically cherry-picked onto a branch off master. * This is done so as not to include extraneous commits, as some people submit pull reqs based on their git master that has other things applied to it. * A set of commits should be squashed down to a single commit. * ``git merge --squash`` is a good tool for performing this, as is ``git rebase -i HEAD~N``. * This is done to prevent anyone using the git repo from accidently pulling work-in-progress commits. * Commit messages should use past tense, describe what changed & thank anyone involved. Examples:: """Added support for the latest version of Whoosh (v2.3.2).""" """Fixed a bug in ``solr_backend.py``. Thanks to joeschmoe for the report!""" """BACKWARD-INCOMPATIBLE: Altered the arguments passed to ``SearchBackend``. Further description appears here if the change warrants an explanation as to why it was done.""" * For any patches applied from a contributor, please ensure their name appears in the AUTHORS file. * When closing issues or pull requests, please reference the SHA in the closing message (i.e. ``Thanks! Fixed in SHA: 6b93f6``). GitHub will automatically link to it. django-haystack-2.8.0/docs/creating_new_backends.rst000066400000000000000000000014641325051407000225530ustar00rootroot00000000000000.. _ref-creating-new-backends: ===================== Creating New Backends ===================== The process should be fairly simple. #. Create new backend file. Name is important. #. Two classes inside. #. SearchBackend (inherit from haystack.backends.BaseSearchBackend) #. SearchQuery (inherit from haystack.backends.BaseSearchQuery) SearchBackend ============= Responsible for the actual connection and low-level details of interacting with the backend. * Connects to search engine * Method for saving new docs to index * Method for removing docs from index * Method for performing the actual query SearchQuery =========== Responsible for taking structured data about the query and converting it into a backend appropriate format. * Method for creating the backend specific query - ``build_query``. django-haystack-2.8.0/docs/debugging.rst000066400000000000000000000112331325051407000202020ustar00rootroot00000000000000.. ref-debugging: ================== Debugging Haystack ================== There are some common problems people run into when using Haystack for the first time. Some of the common problems and things to try appear below. .. note:: As a general suggestion, your best friend when debugging an issue is to use the ``pdb`` library included with Python. By dropping a ``import pdb; pdb.set_trace()`` in your code before the issue occurs, you can step through and examine variable/logic as you progress through. Make sure you don't commit those ``pdb`` lines though. "No module named haystack." =========================== This problem usually occurs when first adding Haystack to your project. * Are you using the ``haystack`` directory within your ``django-haystack`` checkout/install? * Is the ``haystack`` directory on your ``PYTHONPATH``? Alternatively, is ``haystack`` symlinked into your project? * Start a Django shell (``./manage.py shell``) and try ``import haystack``. You may receive a different, more descriptive error message. * Double-check to ensure you have no circular imports. (i.e. module A tries importing from module B which is trying to import from module A.) "No results found." (On the web page) ===================================== Several issues can cause no results to be found. Most commonly it is either not running a ``rebuild_index`` to populate your index or having a blank ``document=True`` field, resulting in no content for the engine to search on. * Do you have a ``search_indexes.py`` located within an installed app? * Do you have data in your database? * Have you run a ``./manage.py rebuild_index`` to index all of your content? * Try running ``./manage.py rebuild_index -v2`` for more verbose output to ensure data is being processed/inserted. * Start a Django shell (``./manage.py shell``) and try:: >>> from haystack.query import SearchQuerySet >>> sqs = SearchQuerySet().all() >>> sqs.count() * You should get back an integer > 0. If not, check the above and reindex. >>> sqs[0] # Should get back a SearchResult object. >>> sqs[0].id # Should get something back like 'myapp.mymodel.1'. >>> sqs[0].text # ... or whatever your document=True field is. * If you get back either ``u''`` or ``None``, it means that your data isn't making it into the main field that gets searched. You need to check that the field either has a template that uses the model data, a ``model_attr`` that pulls data directly from the model or a ``prepare/prepare_FOO`` method that populates the data at index time. * Check the template for your search page and ensure it is looping over the results properly. Also ensure that it's either accessing valid fields coming back from the search engine or that it's trying to access the associated model via the ``{{ result.object.foo }}`` lookup. "LockError: [Errno 17] File exists: '/path/to/whoosh_index/_MAIN_LOCK'" ======================================================================= This is a Whoosh-specific traceback. It occurs when the Whoosh engine in one process/thread is locks the index files for writing while another process/thread tries to access them. This is a common error when using ``RealtimeSignalProcessor`` with Whoosh under any kind of load, which is why it's only recommended for small sites or development. The only real solution is to set up a cron job that runs ``./manage.py rebuild_index`` (optionally with ``--age=24``) that runs nightly (or however often you need) to refresh the search indexes. Then disable the use of the ``RealtimeSignalProcessor`` within your settings. The downside to this is that you lose real-time search. For many people, this isn't an issue and this will allow you to scale Whoosh up to a much higher traffic. If this is not acceptable, you should investigate either the Solr or Xapian backends. "Failed to add documents to Solr: [Reason: None]" ================================================= This is a Solr-specific traceback. It generally occurs when there is an error with your ``HAYSTACK_CONNECTIONS[]['URL']``. Since Solr acts as a webservice, you should test the URL in your web browser. If you receive an error, you may need to change your URL. This can also be caused when using old versions of pysolr (2.0.9 and before) with httplib2 and including a trailing slash in your ``HAYSTACK_CONNECTIONS[]['URL']``. If this applies to you, please upgrade to the current version of pysolr. "Got an unexpected keyword argument 'boost'" ============================================ This is a Solr-specific traceback. This can also be caused when using old versions of pysolr (2.0.12 and before). Please upgrade your version of pysolr (2.0.13+). django-haystack-2.8.0/docs/faceting.rst000066400000000000000000000262521325051407000200360ustar00rootroot00000000000000.. _ref-faceting: ======== Faceting ======== What Is Faceting? ----------------- Faceting is a way to provide users with feedback about the number of documents which match terms they may be interested in. At its simplest, it gives document counts based on words in the corpus, date ranges, numeric ranges or even advanced queries. Faceting is particularly useful when trying to provide users with drill-down capabilities. The general workflow in this regard is: #. You can choose what you want to facet on. #. The search engine will return the counts it sees for that match. #. You display those counts to the user and provide them with a link. #. When the user chooses a link, you narrow the search query to only include those conditions and display the results, potentially with further facets. .. note:: Faceting can be difficult, especially in providing the user with the right number of options and/or the right areas to be able to drill into. This is unique to every situation and demands following what real users need. You may want to consider logging queries and looking at popular terms to help you narrow down how you can help your users. Haystack provides functionality so that all of the above steps are possible. From the ground up, let's build a faceted search setup. This assumes that you have been to work through the :doc:`tutorial` and have a working Haystack installation. The same setup from the :doc:`tutorial` applies here. 1. Determine Facets And ``SearchQuerySet`` ------------------------------------------ Determining what you want to facet on isn't always easy. For our purposes, we'll facet on the ``author`` field. In order to facet effectively, the search engine should store both a standard representation of your data as well as exact version to facet on. This is generally accomplished by duplicating the field and storing it via two different types. Duplication is suggested so that those fields are still searchable in the standard ways. To inform Haystack of this, you simply pass along a ``faceted=True`` parameter on the field(s) you wish to facet on. So to modify our existing example:: class NoteIndex(SearchIndex, indexes.Indexable): text = CharField(document=True, use_template=True) author = CharField(model_attr='user', faceted=True) pub_date = DateTimeField(model_attr='pub_date') Haystack quietly handles all of the backend details for you, creating a similar field to the type you specified with ``_exact`` appended. Our example would now have both a ``author`` and ``author_exact`` field, though this is largely an implementation detail. To pull faceting information out of the index, we'll use the ``SearchQuerySet.facet`` method to setup the facet and the ``SearchQuerySet.facet_counts`` method to retrieve back the counts seen. Experimenting in a shell (``./manage.py shell``) is a good way to get a feel for what various facets might look like:: >>> from haystack.query import SearchQuerySet >>> sqs = SearchQuerySet().facet('author') >>> sqs.facet_counts() { 'dates': {}, 'fields': { 'author': [ ('john', 4), ('daniel', 2), ('sally', 1), ('terry', 1), ], }, 'queries': {} } .. note:: Note that, despite the duplication of fields, you should provide the regular name of the field when faceting. Haystack will intelligently handle the underlying details and mapping. As you can see, we get back a dictionary which provides access to the three types of facets available: ``fields``, ``dates`` and ``queries``. Since we only faceted on the ``author`` field (which actually facets on the ``author_exact`` field managed by Haystack), only the ``fields`` key has any data associated with it. In this case, we have a corpus of eight documents with four unique authors. .. note:: Facets are chainable, like most ``SearchQuerySet`` methods. However, unlike most ``SearchQuerySet`` methods, they are *NOT* affected by ``filter`` or similar methods. The only method that has any effect on facets is the ``narrow`` method (which is how you provide drill-down). Configuring facet behaviour ~~~~~~~~~~~~~~~~~~~~~~~~~~~ You can configure the behaviour of your facets by passing options for each facet in your SearchQuerySet. These options can be backend specific. **limit** *tested on Solr* The ``limit`` parameter limits the results for each query. On Solr, the default `facet.limit`_ is 100 and a negative number removes the limit. .. _facet.limit: https://wiki.apache.org/solr/SimpleFacetParameters#facet.limit Example usage:: >>> from haystack.query import SearchQuerySet >>> sqs = SearchQuerySet().facet('author', limit=-1) >>> sqs.facet_counts() { 'dates': {}, 'fields': { 'author': [ ('abraham', 1), ('benny', 2), ('cindy', 1), ('diana', 5), ], }, 'queries': {} } >>> sqs = SearchQuerySet().facet('author', limit=2) >>> sqs.facet_counts() { 'dates': {}, 'fields': { 'author': [ ('abraham', 1), ('benny', 2), ], }, 'queries': {} } **sort** *tested on Solr* The ``sort`` parameter will sort the results for each query. Solr's default `facet.sort`_ is ``index``, which will sort the facets alphabetically. Changing the parameter to ``count`` will sort the facets by the number of results for each facet value. .. _facet.sort: https://wiki.apache.org/solr/SimpleFacetParameters#facet.sort Example usage:: >>> from haystack.query import SearchQuerySet >>> sqs = SearchQuerySet().facet('author', sort='index', ) >>> sqs.facet_counts() { 'dates': {}, 'fields': { 'author': [ ('abraham', 1), ('benny', 2), ('cindy', 1), ('diana', 5), ], }, 'queries': {} } >>> sqs = SearchQuerySet().facet('author', sort='count', ) >>> sqs.facet_counts() { 'dates': {}, 'fields': { 'author': [ ('diana', 5), ('benny', 2), ('abraham', 1), ('cindy', 1), ], }, 'queries': {} } Now that we have the facet we want, it's time to implement it. 2. Switch to the ``FacetedSearchView`` and ``FacetedSearchForm`` ---------------------------------------------------------------- There are three things that we'll need to do to expose facets to our frontend. The first is construct the ``SearchQuerySet`` we want to use. We should have that from the previous step. The second is to switch to the ``FacetedSearchView``. This view is useful because it prepares the facet counts and provides them in the context as ``facets``. Optionally, the third step is to switch to the ``FacetedSearchForm``. As it currently stands, this is only useful if you want to provide drill-down, though it may provide more functionality in the future. We'll do it for the sake of having it in place but know that it's not required. In your URLconf, you'll need to switch to the ``FacetedSearchView``. Your URLconf should resemble:: from django.conf.urls import url from haystack.forms import FacetedSearchForm from haystack.views import FacetedSearchView urlpatterns = [ url(r'^$', FacetedSearchView(form_class=FacetedSearchForm, facet_fields=['author']), name='haystack_search'), ] The ``FacetedSearchView`` will now instantiate the ``FacetedSearchForm``. The specified ``facet_fields`` will be present in the context variable ``facets``. This is added in an overridden ``extra_context`` method. 3. Display The Facets In The Template ------------------------------------- Templating facets involves simply adding an extra bit of processing to display the facets (and optionally to link to provide drill-down). An example template might look like this::
{{ form.as_table }}
 
{% if query %}

By Author

{% if facets.fields.author %}
Author
{# Provide only the top 5 authors #} {% for author in facets.fields.author|slice:":5" %}
{{ author.0 }} ({{ author.1 }})
{% endfor %} {% else %}

No author facets.

{% endif %}
{% for result in page.object_list %}

{{ result.object.title }}

{{ result.object.body|truncatewords:80 }}

{% empty %}

Sorry, no results found.

{% endfor %} {% endif %} Displaying the facets is a matter of looping through the facets you want and providing the UI to suit. The ``author.0`` is the facet text from the backend and the ``author.1`` is the facet count. 4. Narrowing The Search ----------------------- We've also set ourselves up for the last bit, the drill-down aspect. By appending on the ``selected_facets`` to the URLs, we're informing the ``FacetedSearchForm`` that we want to narrow our results to only those containing the author we provided. For a concrete example, if the facets on author come back as:: { 'dates': {}, 'fields': { 'author': [ ('john', 4), ('daniel', 2), ('sally', 1), ('terry', 1), ], }, 'queries': {} } You should present a list similar to:: .. warning:: Haystack can automatically handle most details around faceting. However, since ``selected_facets`` is passed directly to narrow, it must use the duplicated field name. Improvements to this are planned but incomplete. This is simply the default behavior but it is possible to override or provide your own form which does additional processing. You could also write your own faceted ``SearchView``, which could provide additional/different facets based on facets chosen. There is a wide range of possibilities available to help the user navigate your content. django-haystack-2.8.0/docs/faq.rst000066400000000000000000000114241325051407000170200ustar00rootroot00000000000000.. _ref-frequently-asked-questions: ============================== (In)Frequently Asked Questions ============================== What is Haystack? ================= Haystack is meant to be a portable interface to a search engine of your choice. Some might call it a search framework, an abstraction layer or what have you. The idea is that you write your search code once and should be able to freely switch between backends as your situation necessitates. Why should I consider using Haystack? ===================================== Haystack is targeted at the following use cases: * If you want to feature search on your site and search solutions like Google or Yahoo search don't fit your needs. * If you want to be able to customize your search and search on more than just the main content. * If you want to have features like drill-down (faceting) or "More Like This". * If you want a interface that is non-search engine specific, allowing you to change your mind later without much rewriting. When should I not be using Haystack? ==================================== * Non-Model-based data. If you just want to index random data (flat files, alternate sources, etc.), Haystack isn't a good solution. Haystack is very ``Model``-based and doesn't work well outside of that use case. * Ultra-high volume. Because of the very nature of Haystack (abstraction layer), there's more overhead involved. This makes it portable, but as with all abstraction layers, you lose a little performance. You also can't take full advantage of the exact feature-set of your search engine. This is the price of pluggable backends. Why was Haystack created when there are so many other search options? ===================================================================== The proliferation of search options in Django is a relatively recent development and is actually one of the reasons for Haystack's existence. There are too many options that are only partial solutions or are too engine specific. Further, most use an unfamiliar API and documentation is lacking in most cases. Haystack is an attempt to unify these efforts into one solution. That's not to say there should be no alternatives, but Haystack should provide a good solution to 80%+ of the search use cases out there. What's the history behind Haystack? =================================== Haystack started because of my frustration with the lack of good search options (before many other apps came out) and as the result of extensive use of Djangosearch. Djangosearch was a decent solution but had a number of shortcomings, such as: * Tied to the models.py, so you'd have to modify the source of third-party ( or django.contrib) apps in order to effectively use it. * All or nothing approach to indexes. So all indexes appear on all sites and in all places. * Lack of tests. * Lack of documentation. * Uneven backend implementations. The initial idea was to simply fork Djangosearch and improve on these (and other issues). However, after stepping back, I decided to overhaul the entire API (and most of the underlying code) to be more representative of what I would want as an end-user. The result was starting afresh and reusing concepts (and some code) from Djangosearch as needed. As a result of this heritage, you can actually still find some portions of Djangosearch present in Haystack (especially in the ``SearchIndex`` and ``SearchBackend`` classes) where it made sense. The original authors of Djangosearch are aware of this and thus far have seemed to be fine with this reuse. Why doesn't have a backend included in Haystack? ================================================================== Several possibilities on this. #. Licensing A common problem is that the Python bindings for a specific engine may have been released under an incompatible license. The goal is for Haystack to remain BSD licensed and importing bindings with an incompatible license can technically convert the entire codebase to that license. This most commonly occurs with GPL'ed bindings. #. Lack of time The search engine in question may be on the list of backends to add and we simply haven't gotten to it yet. We welcome patches for additional backends. #. Incompatible API In order for an engine to work well with Haystack, a certain baseline set of features is needed. This is often an issue when the engine doesn't support ranged queries or additional attributes associated with a search record. #. We're not aware of the engine If you think we may not be aware of the engine you'd like, please tell us about it (preferably via the group - http://groups.google.com/group/django-haystack/). Be sure to check through the backends (in case it wasn't documented) and search the history on the group to minimize duplicates. django-haystack-2.8.0/docs/glossary.rst000066400000000000000000000062521325051407000201170ustar00rootroot00000000000000.. _ref-glossary: ======== Glossary ======== Search is a domain full of its own jargon and definitions. As this may be an unfamiliar territory to many developers, what follows are some commonly used terms and what they mean. Engine An engine, for the purposes of Haystack, is a third-party search solution. It might be a full service (i.e. Solr_) or a library to build an engine with (i.e. Whoosh_) .. _Solr: http://lucene.apache.org/solr/ .. _Whoosh: https://bitbucket.org/mchaput/whoosh/ Index The datastore used by the engine is called an index. Its structure can vary wildly between engines but commonly they resemble a document store. This is the source of all information in Haystack. Document A document is essentially a record within the index. It usually contains at least one blob of text that serves as the primary content the engine searches and may have additional data hung off it. Corpus A term for a collection of documents. When talking about the documents stored by the engine (rather than the technical implementation of the storage), this term is commonly used. Field Within the index, each document may store extra data with the main content as a field. Also sometimes called an attribute, this usually represents metadata or extra content about the document. Haystack can use these fields for filtering and display. Term A term is generally a single word (or word-like) string of characters used in a search query. Stemming A means of determining if a word has any root words. This varies by language, but in English, this generally consists of removing plurals, an action form of the word, et cetera. For instance, in English, 'giraffes' would stem to 'giraffe'. Similarly, 'exclamation' would stem to 'exclaim'. This is useful for finding variants of the word that may appear in other documents. Boost Boost provides a means to take a term or phrase from a search query and alter the relevance of a result based on if that term is found in the result, a form of weighting. For instance, if you wanted to more heavily weight results that included the word 'zebra', you'd specify a boost for that term within the query. More Like This Incorporating techniques from information retrieval and artificial intelligence, More Like This is a technique for finding other documents within the index that closely resemble the document in question. This is useful for programmatically generating a list of similar content for a user to browse based on the current document they are viewing. Faceting Faceting is a way to provide insight to the user into the contents of your corpus. In its simplest form, it is a set of document counts returned with results when performing a query. These counts can be used as feedback for the user, allowing the user to choose interesting aspects of their search results and "drill down" into those results. An example might be providing a facet on an ``author`` field, providing back a list of authors and the number of documents in the index they wrote. This could be presented to the user with a link, allowing the user to click and narrow their original search to all results by that author. django-haystack-2.8.0/docs/haystack_theme/000077500000000000000000000000001325051407000205065ustar00rootroot00000000000000django-haystack-2.8.0/docs/haystack_theme/layout.html000066400000000000000000000011661325051407000227150ustar00rootroot00000000000000{% extends "basic/layout.html" %} {%- block extrahead %} {% endblock %} {%- block header %} {% endblock %}django-haystack-2.8.0/docs/haystack_theme/static/000077500000000000000000000000001325051407000217755ustar00rootroot00000000000000django-haystack-2.8.0/docs/haystack_theme/static/documentation.css000066400000000000000000000024551325051407000253660ustar00rootroot00000000000000a, a:link, a:hover { background-color: transparent !important; color: #CAECFF; outline-color: transparent !important; text-decoration: underline; } dl dt { text-decoration: underline; } dl.class dt, dl.method dt { background-color: #444444; padding: 5px; text-decoration: none; } tt.descname { font-weight: normal; } dl.method dt span.optional { font-weight: normal; } div#header { margin-bottom: 0px; } div.document, div.related, div.footer { width: 900px; margin: 0 auto; } div.document { margin-top: 10px; } div.related { background-color: #262511; padding-left: 10px; padding-right: 10px; } div.documentwrapper { width:640px; float:left;} div.body h1, div.body h2, div.body h3, div.body h4, div.body h5, div.body h6 { background-color: #053211; font-weight: normal; border-bottom: 2px solid #262511; margin: 20px -20px 10px -20px; padding: 3px 0 3px 10px; } div.sphinxsidebar { width:220px; float:right;} div.sphinxsidebar ul { padding-left: 10px; } div.sphinxsidebar ul ul { padding-left: 10px; margin-left: 10px; } div.bodywrapper { margin: 0px; } div.highlight-python, div.highlight { background-color: #262511; margin-bottom: 10px; padding: 10px; } div.footer { background-color:#262511; font-size: 90%; padding: 10px; } table thead { background-color: #053211; border-bottom: 1px solid #262511; }django-haystack-2.8.0/docs/haystack_theme/theme.conf000066400000000000000000000000271325051407000224560ustar00rootroot00000000000000[theme] inherit = basicdjango-haystack-2.8.0/docs/highlighting.rst000066400000000000000000000061601325051407000207170ustar00rootroot00000000000000.. _ref-highlighting: ============ Highlighting ============ Haystack supports two different methods of highlighting. You can either use ``SearchQuerySet.highlight`` or the built-in ``{% highlight %}`` template tag, which uses the ``Highlighter`` class. Each approach has advantages and disadvantages you need to weigh when deciding which to use. If you want portable, flexible, decently fast code, the ``{% highlight %}`` template tag (or manually using the underlying ``Highlighter`` class) is the way to go. On the other hand, if you care more about speed and will only ever be using one backend, ``SearchQuerySet.highlight`` may suit your needs better. Use of ``SearchQuerySet.highlight`` is documented in the :doc:`searchqueryset_api` documentation and the ``{% highlight %}`` tag is covered in the :doc:`templatetags` documentation, so the rest of this material will cover the ``Highlighter`` implementation. ``Highlighter`` --------------- The ``Highlighter`` class is a pure-Python implementation included with Haystack that's designed for flexibility. If you use the ``{% highlight %}`` template tag, you'll be automatically using this class. You can also use it manually in your code. For example:: >>> from haystack.utils.highlighting import Highlighter >>> my_text = 'This is a sample block that would be more meaningful in real life.' >>> my_query = 'block meaningful' >>> highlight = Highlighter(my_query) >>> highlight.highlight(my_text) u'...block that would be more meaningful in real life.' The default implementation takes three optional kwargs: ``html_tag``, ``css_class`` and ``max_length``. These allow for basic customizations to the output, like so:: >>> from haystack.utils.highlighting import Highlighter >>> my_text = 'This is a sample block that would be more meaningful in real life.' >>> my_query = 'block meaningful' >>> highlight = Highlighter(my_query, html_tag='div', css_class='found', max_length=35) >>> highlight.highlight(my_text) u'...
block
that would be more
meaningful
...' Further, if this implementation doesn't suit your needs, you can define your own custom highlighter class. As long as it implements the API you've just seen, it can highlight however you choose. For example:: # In ``myapp/utils.py``... from haystack.utils.highlighting import Highlighter class BorkHighlighter(Highlighter): def render_html(self, highlight_locations=None, start_offset=None, end_offset=None): highlighted_chunk = self.text_block[start_offset:end_offset] for word in self.query_words: highlighted_chunk = highlighted_chunk.replace(word, 'Bork!') return highlighted_chunk Then set the ``HAYSTACK_CUSTOM_HIGHLIGHTER`` setting to ``myapp.utils.BorkHighlighter``. Usage would then look like:: >>> highlight = BorkHighlighter(my_query) >>> highlight.highlight(my_text) u'Bork! that would be more Bork! in real life.' Now the ``{% highlight %}`` template tag will also use this highlighter. django-haystack-2.8.0/docs/index.rst000066400000000000000000000053771325051407000173720ustar00rootroot00000000000000Welcome to Haystack! ==================== Haystack provides modular search for Django. It features a unified, familiar API that allows you to plug in different search backends (such as Solr_, Elasticsearch_, Whoosh_, Xapian_, etc.) without having to modify your code. .. _Solr: http://lucene.apache.org/solr/ .. _Elasticsearch: http://elasticsearch.org/ .. _Whoosh: https://bitbucket.org/mchaput/whoosh/ .. _Xapian: http://xapian.org/ .. note:: This documentation represents the current version of Haystack. For old versions of the documentation: * v2.5.X: https://django-haystack.readthedocs.io/en/v2.5.1/ * v2.4.X: https://django-haystack.readthedocs.io/en/v2.4.1/ * v2.3.X: https://django-haystack.readthedocs.io/en/v2.3.0/ * v2.2.X: https://django-haystack.readthedocs.io/en/v2.2.0/ * v2.1.X: https://django-haystack.readthedocs.io/en/v2.1.0/ * v2.0.X: https://django-haystack.readthedocs.io/en/v2.0.0/ * v1.2.X: https://django-haystack.readthedocs.io/en/v1.2.7/ * v1.1.X: https://django-haystack.readthedocs.io/en/v1.1/ Getting Started --------------- If you're new to Haystack, you may want to start with these documents to get you up and running: .. toctree:: :maxdepth: 2 tutorial .. toctree:: :maxdepth: 1 views_and_forms templatetags glossary management_commands faq who_uses other_apps installing_search_engines debugging changelog contributing python3 migration_from_1_to_2 Advanced Uses ------------- Once you've got Haystack working, here are some of the more complex features you may want to include in your application. .. toctree:: :maxdepth: 1 best_practices highlighting faceting autocomplete boost signal_processors multiple_index rich_content_extraction spatial admin Reference --------- If you're an experienced user and are looking for a reference, you may be looking for API documentation and advanced usage as detailed in: .. toctree:: :maxdepth: 2 searchqueryset_api searchindex_api inputtypes searchfield_api searchresult_api searchquery_api searchbackend_api architecture_overview backend_support settings utils Developing ---------- Finally, if you're looking to help out with the development of Haystack, the following links should help guide you on running tests and creating additional backends: .. toctree:: :maxdepth: 1 running_tests creating_new_backends Requirements ------------ Haystack has a relatively easily-met set of requirements. * Python 2.7+ or Python 3.3+ * A supported version of Django: https://www.djangoproject.com/download/#supported-versions Additionally, each backend has its own requirements. You should refer to :doc:`installing_search_engines` for more details. django-haystack-2.8.0/docs/inputtypes.rst000066400000000000000000000120621325051407000204740ustar00rootroot00000000000000.. _ref-inputtypes: =========== Input Types =========== Input types allow you to specify more advanced query behavior. They serve as a way to alter the query, often in backend-specific ways, without altering your Python code; as well as enabling use of more advanced features. Input types currently are only useful with the ``filter/exclude`` methods on ``SearchQuerySet``. Expanding this support to other methods is on the roadmap. Available Input Types ===================== Included with Haystack are the following input types: ``Raw`` ------- .. class:: haystack.inputs.Raw Raw allows you to specify backend-specific query syntax. If Haystack doesn't provide a way to access special query functionality, you can make use of this input type to pass it along. Example:: # Fielded. sqs = SearchQuerySet().filter(author=Raw('daniel OR jones')) # Non-fielded. # See ``AltParser`` for a better way to construct this. sqs = SearchQuerySet().filter(content=Raw('{!dismax qf=author mm=1}haystack')) ``Clean`` --------- .. class:: haystack.inputs.Clean ``Clean`` takes standard user (untrusted) input and sanitizes it. It ensures that no unintended operators or special characters make it into the query. This is roughly analogous to Django's ``autoescape`` support. .. note:: By default, if you hand a ``SearchQuerySet`` a bare string, it will get wrapped in this class. Example:: # This becomes "daniel or jones". sqs = SearchQuerySet().filter(content=Clean('daniel OR jones')) # Things like ``:`` & ``/`` get escaped. sqs = SearchQuerySet().filter(url=Clean('http://www.example.com')) # Equivalent (automatically wrapped in ``Clean``). sqs = SearchQuerySet().filter(url='http://www.example.com') ``Exact`` --------- .. class:: haystack.inputs.Exact ``Exact`` allows for making sure a phrase is exactly matched, unlike the usual ``AND`` lookups, where words may be far apart. Example:: sqs = SearchQuerySet().filter(author=Exact('n-gram support')) # Equivalent. sqs = SearchQuerySet().filter(author__exact='n-gram support') ``Not`` ------- .. class:: haystack.inputs.Not ``Not`` allows negation of the query fragment it wraps. As ``Not`` is a subclass of ``Clean``, it will also sanitize the query. This is generally only used internally. Most people prefer to use the ``SearchQuerySet.exclude`` method. Example:: sqs = SearchQuerySet().filter(author=Not('daniel')) ``AutoQuery`` ------------- .. class:: haystack.inputs.AutoQuery ``AutoQuery`` takes a more complex user query (that includes simple, standard query syntax bits) & forms a proper query out of them. It also handles sanitizing that query using ``Clean`` to ensure the query doesn't break. ``AutoQuery`` accommodates for handling regular words, NOT-ing words & extracting exact phrases. Example:: # Against the main text field with an accidental ":" before "search". # Generates a query like ``haystack (NOT whoosh) "fast search"`` sqs = SearchQuerySet().filter(content=AutoQuery('haystack -whoosh "fast :search"')) # Equivalent. sqs = SearchQuerySet().auto_query('haystack -whoosh "fast :search"') # Fielded. sqs = SearchQuerySet().filter(author=AutoQuery('daniel -day -lewis')) ``AltParser`` ------------- .. class:: haystack.inputs.AltParser ``AltParser`` lets you specify that a portion of the query should use a separate parser in the search engine. This is search-engine-specific, so it may decrease the portability of your app. Currently only supported under Solr. Example:: # DisMax. sqs = SearchQuerySet().filter(content=AltParser('dismax', 'haystack', qf='text', mm=1)) # Prior to the spatial support, you could do... sqs = SearchQuerySet().filter(content=AltParser('dismax', 'haystack', qf='author', mm=1)) Creating Your Own Input Types ============================= Building your own input type is relatively simple. All input types are simple classes that provide an ``__init__`` & a ``prepare`` method. The ``__init__`` may accept any ``args/kwargs``, though the typical use usually just involves a query string. The ``prepare`` method lets you alter the query the user provided before it becomes of the main query. It is lazy, called as late as possible, right before the final query is built & shipped to the engine. A full, if somewhat silly, example looks like:: from haystack.inputs import Clean class NoShoutCaps(Clean): input_type_name = 'no_shout_caps' # This is the default & doesn't need to be specified. post_process = True def __init__(self, query_string, **kwargs): # Stash the original, if you need it. self.original = query_string super(NoShoutCaps, self).__init__(query_string, **kwargs) def prepare(self, query_obj): # We need a reference to the current ``SearchQuery`` object this # will run against, in case we need backend-specific code. query_string = super(NoShoutCaps, self).prepare(query_obj) # Take that, capital letters! return query_string.lower() django-haystack-2.8.0/docs/installing_search_engines.rst000066400000000000000000000221161325051407000234520ustar00rootroot00000000000000.. _ref-installing-search-engines: ========================= Installing Search Engines ========================= Solr ==== Official Download Location: http://www.apache.org/dyn/closer.cgi/lucene/solr/ Solr is Java but comes in a pre-packaged form that requires very little other than the JRE and Jetty. It's very performant and has an advanced featureset. Haystack suggests using Solr 6.x, though it's possible to get it working on Solr 4.x+ with a little effort. Installation is relatively simple: For Solr 6.X:: curl -LO https://archive.apache.org/dist/lucene/solr/x.Y.0/solr-X.Y.0.tgz tar -C solr -xf solr-X.Y.0.tgz --strip-components=1 cd solr ./bin/solr create -c tester -n basic_config By default this will create a core with a managed schema. This setup is dynamic but not useful for haystack, and we'll need to configure solr to use a static (classic) schema. Haystack can generate a viable schema.xml and solrconfig.xml for you from your application and reload the core for you (once Haystack is installed and setup). To do this run: ``./manage.py build_solr_schema --configure-directory= --reload-core``. In this example CoreConfigDir is something like ``../solr-6.5.0/server/solr/tester/conf``, and ``--reload-core`` is what triggers reloading of the core. Please refer to ``build_solr_schema`` in the :doc:`management-commands` for required configuration. For Solr 4.X:: curl -LO https://archive.apache.org/dist/lucene/solr/4.10.2/solr-4.10.2.tgz tar xvzf solr-4.10.2.tgz cd solr-4.10.2 cd example java -jar start.jar You’ll need to revise your schema. You can generate this from your application (once Haystack is installed and setup) by running ``./manage.py build_solr_schema``. Take the output from that command and place it in ``solr-4.10.2/example/solr/collection1/conf/schema.xml``. Then restart Solr. .. warning:: Please note; the template filename, the file YOU supply under TEMPLATE_DIR/search_configuration has changed to schema.xml from solr.xml. The previous template name solr.xml was a legacy holdover from older versions of solr. You'll also need a Solr binding, ``pysolr``. The official ``pysolr`` package, distributed via PyPI, is the best version to use (2.1.0+). Place ``pysolr.py`` somewhere on your ``PYTHONPATH``. .. note:: ``pysolr`` has its own dependencies that aren't covered by Haystack. See https://pypi.python.org/pypi/pysolr for the latest documentation. Simplest approach is to install using ``pip install pysolr`` More Like This -------------- To enable the "More Like This" functionality in Haystack, you'll need to enable the ``MoreLikeThisHandler``. Add the following line to your ``solrconfig.xml`` file within the ``config`` tag:: Spelling Suggestions -------------------- To enable the spelling suggestion functionality in Haystack, you'll need to enable the ``SpellCheckComponent``. The first thing to do is create a special field on your ``SearchIndex`` class that mirrors the ``text`` field, but uses ``FacetCharField``. This disables the post-processing that Solr does, which can mess up your suggestions. Something like the following is suggested:: class MySearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) # ... normal fields then... suggestions = indexes.FacetCharField() def prepare(self, obj): prepared_data = super(MySearchIndex, self).prepare(obj) prepared_data['suggestions'] = prepared_data['text'] return prepared_data Then, you enable it in Solr by adding the following line to your ``solrconfig.xml`` file within the ``config`` tag:: text_general default text solr.DirectSolrSpellChecker internal 0.5 2 1 5 4 0.01 Then change your default handler from:: explicit 10 ... to ...:: explicit 10 default on true 10 5 5 true true 10 5 spellcheck Be warned that the ``suggestions`` portion will be specific to your ``SearchIndex`` classes (in this case, assuming the main field is called ``text``). Elasticsearch ============= Official Download Location: http://www.elasticsearch.org/download/ Elasticsearch is Java but comes in a pre-packaged form that requires very little other than the JRE. It's also very performant, scales easily and has an advanced featureset. Haystack currently only supports Elasticsearch 1.x and 2.x. Elasticsearch 5.x is not supported yet, if you would like to help, please see `#1383 `_. Installation is best done using a package manager:: # On Mac OS X... brew install elasticsearch # On Ubuntu... apt-get install elasticsearch # Then start via: elasticsearch -f -D es.config= # Example: elasticsearch -f -D es.config=/usr/local/Cellar/elasticsearch/0.90.0/config/elasticsearch.yml You may have to alter the configuration to run on ``localhost`` when developing locally. Modifications should be done in a YAML file, the stock one being ``config/elasticsearch.yml``:: # Unicast Discovery (disable multicast) discovery.zen.ping.multicast.enabled: false discovery.zen.ping.unicast.hosts: ["127.0.0.1"] # Name your cluster here to whatever. # My machine is called "Venus", so... cluster: name: venus network: host: 127.0.0.1 path: logs: /usr/local/var/log data: /usr/local/var/data You'll also need an Elasticsearch binding: elasticsearch_ (**NOT** ``pyes``). Place ``elasticsearch`` somewhere on your ``PYTHONPATH`` (usually ``python setup.py install`` or ``pip install elasticsearch``). .. _elasticsearch: http://pypi.python.org/pypi/elasticsearch/ .. note:: ``elasticsearch`` has its own dependencies that aren't covered by Haystack. You'll also need ``urllib3``. Whoosh ====== Official Download Location: http://bitbucket.org/mchaput/whoosh/ Whoosh is pure Python, so it's a great option for getting started quickly and for development, though it does work for small scale live deployments. The current recommended version is 1.3.1+. You can install via PyPI_ using ``sudo easy_install whoosh`` or ``sudo pip install whoosh``. Note that, while capable otherwise, the Whoosh backend does not currently support "More Like This" or faceting. Support for these features has recently been added to Whoosh itself & may be present in a future release. .. _PyPI: http://pypi.python.org/pypi/Whoosh/ Xapian ====== Official Download Location: http://xapian.org/download Xapian is written in C++ so it requires compilation (unless your OS has a package for it). Installation looks like:: curl -O http://oligarchy.co.uk/xapian/1.2.18/xapian-core-1.2.18.tar.xz curl -O http://oligarchy.co.uk/xapian/1.2.18/xapian-bindings-1.2.18.tar.xz unxz xapian-core-1.2.18.tar.xz unxz xapian-bindings-1.2.18.tar.xz tar xvf xapian-core-1.2.18.tar tar xvf xapian-bindings-1.2.18.tar cd xapian-core-1.2.18 ./configure make sudo make install cd .. cd xapian-bindings-1.2.18 ./configure make sudo make install Xapian is a third-party supported backend. It is not included in Haystack proper due to licensing. To use it, you need both Haystack itself as well as ``xapian-haystack``. You can download the source from http://github.com/notanumber/xapian-haystack/tree/master. Installation instructions can be found on that page as well. The backend, written by David Sauve (notanumber), fully implements the `SearchQuerySet` API and is an excellent alternative to Solr. django-haystack-2.8.0/docs/management_commands.rst000066400000000000000000000234501325051407000222500ustar00rootroot00000000000000.. _ref-management-commands: =================== Management Commands =================== Haystack comes with several management commands to make working with Haystack easier. ``clear_index`` =============== The ``clear_index`` command wipes out your entire search index. Use with caution. In addition to the standard management command options, it accepts the following arguments: ``--noinput``: If provided, the interactive prompts are skipped and the index is unceremoniously wiped out. ``--verbosity``: Accepted but ignored. ``--using``: Update only the named backend (can be used multiple times). By default, all backends will be updated. ``--nocommit``: If provided, it will pass commit=False to the backend. This means that the update will not become immediately visible and will depend on another explicit commit or the backend's commit strategy to complete the update. By default, this is an **INTERACTIVE** command and assumes that you do **NOT** wish to delete the entire index. .. note:: The ``--nocommit`` argument is only supported by the Solr backend. .. warning:: Depending on the backend you're using, this may simply delete the entire directory, so be sure your ``HAYSTACK_CONNECTIONS[]['PATH']`` setting is correctly pointed at just the index directory. ``update_index`` ================ .. note:: If you use the ``--start/--end`` flags on this command, you'll need to install dateutil_ to handle the datetime parsing. .. _dateutil: http://pypi.python.org/pypi/python-dateutil/1.5 The ``update_index`` command will freshen all of the content in your index. It iterates through all indexed models and updates the records in the index. In addition to the standard management command options, it accepts the following arguments: ``--age``: Number of hours back to consider objects new. Useful for nightly reindexes (``--age=24``). Requires ``SearchIndexes`` to implement the ``get_updated_field`` method. Default is ``None``. ``--start``: The start date for indexing within. Can be any dateutil-parsable string, recommended to be YYYY-MM-DDTHH:MM:SS. Requires ``SearchIndexes`` to implement the ``get_updated_field`` method. Default is ``None``. ``--end``: The end date for indexing within. Can be any dateutil-parsable string, recommended to be YYYY-MM-DDTHH:MM:SS. Requires ``SearchIndexes`` to implement the ``get_updated_field`` method. Default is ``None``. ``--batch-size``: Number of items to index at once. Default is 1000. ``--remove``: Remove objects from the index that are no longer present in the database. ``--workers``: Allows for the use multiple workers to parallelize indexing. Requires ``multiprocessing``. ``--verbosity``: If provided, dumps out more information about what's being done. * ``0`` = No output * ``1`` = Minimal output describing what models were indexed and how many records. * ``2`` = Full output, including everything from ``1`` plus output on each batch that is indexed, which is useful when debugging. ``--using``: Update only the named backend (can be used multiple times). By default, all backends will be updated. ``--nocommit``: If provided, it will pass commit=False to the backend. This means that the updates will not become immediately visible and will depend on another explicit commit or the backend's commit strategy to complete the update. .. note:: The ``--nocommit`` argument is only supported by the Solr and ElasticSearch backends. Examples:: # Update everything. ./manage.py update_index --settings=settings.prod # Update everything with lots of information about what's going on. ./manage.py update_index --settings=settings.prod --verbosity=2 # Update everything, cleaning up after deleted models. ./manage.py update_index --remove --settings=settings.prod # Update everything changed in the last 2 hours. ./manage.py update_index --age=2 --settings=settings.prod # Update everything between Dec. 1, 2011 & Dec 31, 2011 ./manage.py update_index --start='2011-12-01T00:00:00' --end='2011-12-31T23:59:59' --settings=settings.prod # Update just a couple apps. ./manage.py update_index blog auth comments --settings=settings.prod # Update just a single model (in a complex app). ./manage.py update_index auth.User --settings=settings.prod # Crazy Go-Nuts University ./manage.py update_index events.Event media news.Story --start='2011-01-01T00:00:00 --remove --using=hotbackup --workers=12 --verbosity=2 --settings=settings.prod .. note:: This command *ONLY* updates records in the index. It does *NOT* handle deletions unless the ``--remove`` flag is provided. You might consider a queue consumer if the memory requirements for ``--remove`` don't fit your needs. Alternatively, you can use the ``RealtimeSignalProcessor``, which will automatically handle deletions. ``rebuild_index`` ================= A shortcut for ``clear_index`` followed by ``update_index``. It accepts any/all of the arguments of the following arguments: ``--age``: Number of hours back to consider objects new. Useful for nightly reindexes (``--age=24``). Requires ``SearchIndexes`` to implement the ``get_updated_field`` method. ``--batch-size``: Number of items to index at once. Default is 1000. ``--site``: The site object to use when reindexing (like `search_sites.mysite`). ``--noinput``: If provided, the interactive prompts are skipped and the index is unceremoniously wiped out. ``--remove``: Remove objects from the index that are no longer present in the database. ``--verbosity``: If provided, dumps out more information about what's being done. * ``0`` = No output * ``1`` = Minimal output describing what models were indexed and how many records. * ``2`` = Full output, including everything from ``1`` plus output on each batch that is indexed, which is useful when debugging. ``--using``: Update only the named backend (can be used multiple times). By default, all backends will be updated. ``--nocommit``: If provided, it will pass commit=False to the backend. This means that the update will not become immediately visible and will depend on another explicit commit or the backend's commit strategy to complete the update. For when you really, really want a completely rebuilt index. ``build_solr_schema`` ===================== Once all of your ``SearchIndex`` classes are in place, this command can be used to generate the XML schema Solr needs to handle the search data. Generates a Solr schema and solrconfig file that reflects the indexes using templates under a Django template dir 'search_configuration/\*.xml'. If none are found, then provides defaults suitable for Solr 6.4. It accepts the following arguments: ``--filename``: If provided, renders schema.xml from the template directory directly to a file instead of stdout. Does not render solrconfig.xml ``--using``: Update only the named backend (can be used multiple times). By default all backends will be updated. ``--configure-directory``: If provided, attempts to configure a core located in the given directory by removing the ``managed-schema.xml`` (renaming if it exists), configuring the core by rendering the ``schema.xml`` and ``solrconfig.xml`` templates provided in the Django project's ``TEMPLATE_DIR/search_configuration`` directories. ``--reload-core``: If provided, attempts to automatically reload the solr core via the urls in the ``URL`` and ``ADMIN_URL`` settings of the Solr entry in ``HAYSTACK_CONNECTIONS``. Both *must* be provided. .. note:: ``build_solr_schema --configure-directory=`` can be used in isolation to drop configured files anywhere one might want for staging to one or more solr instances through arbitrary means. It will render all template files in the directory into the ``configure-directory`` ``build_solr_schema --configure-directory= --reload-core`` can be used together to reconfigure and reload a core located on a filesystem accessible to Django in a one-shot mechanism with no further requirements (assuming there are no errors in the template or configuration) .. note:: ``build_solr_schema`` uses templates to generate the output files. Haystack provides default templates for ``schema.xml`` and ``solrconfig.xml`` that are solr 6.5 compatible using some sensible defaults. If you would like to provide your own template, you will need to place it in ``search_configuration/`` inside a directory specified by your app's template directories settings. Examples:: /myproj/myapp/templates/search_configuration/schema.xml /myproj/myapp/templates/search_configuration/sorlconfig.xml /myproj/myapp/templates/search_configuration/otherfile.xml # ...or... /myproj/templates/search_configuration/schema.xml /myproj/templates/search_configuration/sorlconfig.xml /myproj/myapp/templates/search_configuration/otherfile.xml .. warning:: This command does NOT automatically update the ``schema.xml`` file for you all by itself. You must use --filename or --configure-directory to achieve this. ``haystack_info`` ================= Provides some basic information about how Haystack is setup and what models it is handling. It accepts no arguments. Useful when debugging or when using Haystack-enabled third-party apps. django-haystack-2.8.0/docs/migration_from_1_to_2.rst000066400000000000000000000236111325051407000224310ustar00rootroot00000000000000.. _ref-migration_from_1_to_2: =========================================== Migrating From Haystack 1.X to Haystack 2.X =========================================== Haystack introduced several backward-incompatible changes in the process of moving from the 1.X series to the 2.X series. These were done to clean up the API, to support new features & to clean up problems in 1.X. At a high level, they consisted of: * The removal of ``SearchSite`` & ``haystack.site``. * The removal of ``handle_registrations`` & ``autodiscover``. * The addition of multiple index support. * The addition of ``SignalProcessors`` & the removal of ``RealTimeSearchIndex``. * The removal/renaming of various settings. This guide will help you make the changes needed to be compatible with Haystack 2.X. Settings ======== Most prominently, the old way of specifying a backend & its settings has changed to support the multiple index feature. A complete Haystack 1.X example might look like:: HAYSTACK_SEARCH_ENGINE = 'solr' HAYSTACK_SOLR_URL = 'http://localhost:9001/solr/default' HAYSTACK_SOLR_TIMEOUT = 60 * 5 HAYSTACK_INCLUDE_SPELLING = True HAYSTACK_BATCH_SIZE = 100 # Or... HAYSTACK_SEARCH_ENGINE = 'whoosh' HAYSTACK_WHOOSH_PATH = '/home/search/whoosh_index' HAYSTACK_WHOOSH_STORAGE = 'file' HAYSTACK_WHOOSH_POST_LIMIT = 128 * 1024 * 1024 HAYSTACK_INCLUDE_SPELLING = True HAYSTACK_BATCH_SIZE = 100 # Or... HAYSTACK_SEARCH_ENGINE = 'xapian' HAYSTACK_XAPIAN_PATH = '/home/search/xapian_index' HAYSTACK_INCLUDE_SPELLING = True HAYSTACK_BATCH_SIZE = 100 In Haystack 2.X, you can now supply as many backends as you like, so all of the above settings can now be active at the same time. A translated set of settings would look like:: HAYSTACK_CONNECTIONS = { 'default': { 'ENGINE': 'haystack.backends.solr_backend.SolrEngine', 'URL': 'http://localhost:9001/solr/default', 'TIMEOUT': 60 * 5, 'INCLUDE_SPELLING': True, 'BATCH_SIZE': 100, }, 'autocomplete': { 'ENGINE': 'haystack.backends.whoosh_backend.WhooshEngine', 'PATH': '/home/search/whoosh_index', 'STORAGE': 'file', 'POST_LIMIT': 128 * 1024 * 1024, 'INCLUDE_SPELLING': True, 'BATCH_SIZE': 100, }, 'slave': { 'ENGINE': 'xapian_backend.XapianEngine', 'PATH': '/home/search/xapian_index', 'INCLUDE_SPELLING': True, 'BATCH_SIZE': 100, }, } You are required to have at least one connection listed within ``HAYSTACK_CONNECTIONS``, it must be named ``default`` & it must have a valid ``ENGINE`` within it. Bare minimum looks like:: HAYSTACK_CONNECTIONS = { 'default': { 'ENGINE': 'haystack.backends.simple_backend.SimpleEngine' } } The key for each backend is an identifier you use to describe the backend within your app. You should refer to the :ref:`ref-multiple_index` documentation for more information on using the new multiple indexes & routing features. Also note that the ``ENGINE`` setting has changed from a lowercase "short name" of the engine to a full path to a new ``Engine`` class within the backend. Available options are: * ``haystack.backends.solr_backend.SolrEngine`` * ``haystack.backends.whoosh_backend.WhooshEngine`` * ``haystack.backends.simple_backend.SimpleEngine`` Additionally, the following settings were outright removed & will generate an exception if found: * ``HAYSTACK_SITECONF`` - Remove this setting & the file it pointed to. * ``HAYSTACK_ENABLE_REGISTRATIONS`` * ``HAYSTACK_INCLUDE_SPELLING`` Backends ======== The ``dummy`` backend was outright removed from Haystack, as it served very little use after the ``simple`` (pure-ORM-powered) backend was introduced. If you wrote a custom backend, please refer to the "Custom Backends" section below. Indexes ======= The other major changes affect the ``SearchIndex`` class. As the concept of ``haystack.site`` & ``SearchSite`` are gone, you'll need to modify your indexes. A Haystack 1.X index might've looked like:: import datetime from haystack.indexes import * from haystack import site from myapp.models import Note class NoteIndex(SearchIndex): text = CharField(document=True, use_template=True) author = CharField(model_attr='user') pub_date = DateTimeField(model_attr='pub_date') def get_queryset(self): """Used when the entire index for model is updated.""" return Note.objects.filter(pub_date__lte=datetime.datetime.now()) site.register(Note, NoteIndex) A converted Haystack 2.X index should look like:: import datetime from haystack import indexes from myapp.models import Note class NoteIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) author = indexes.CharField(model_attr='user') pub_date = indexes.DateTimeField(model_attr='pub_date') def get_model(self): return Note def index_queryset(self, using=None): """Used when the entire index for model is updated.""" return self.get_model().objects.filter(pub_date__lte=datetime.datetime.now()) Note the import on ``site`` & the registration statements are gone. Newly added are is the ``NoteIndex.get_model`` method. This is a **required** method & should simply return the ``Model`` class the index is for. There's also a new, additional class added to the ``class`` definition. The ``indexes.Indexable`` class is a simple mixin that serves to identify the classes Haystack should automatically discover & use. If you have a custom base class (say ``QueuedSearchIndex``) that other indexes inherit from, simply leave the ``indexes.Indexable`` off that declaration & Haystack won't try to use it. Additionally, the name of the ``document=True`` field is now enforced to be ``text`` across all indexes. If you need it named something else, you should set the ``HAYSTACK_DOCUMENT_FIELD`` setting. For example:: HAYSTACK_DOCUMENT_FIELD = 'pink_polka_dot' Finally, the ``index_queryset`` method should supplant the ``get_queryset`` method. This was present in the Haystack 1.2.X series (with a deprecation warning in 1.2.4+) but has been removed in Haystack v2. Finally, if you were unregistering other indexes before, you should make use of the new ``EXCLUDED_INDEXES`` setting available in each backend's settings. It should be a list of strings that contain the Python import path to the indexes that should not be loaded & used. For example:: HAYSTACK_CONNECTIONS = { 'default': { 'ENGINE': 'haystack.backends.solr_backend.SolrEngine', 'URL': 'http://localhost:9001/solr/default', 'EXCLUDED_INDEXES': [ # Imagine that these indexes exist. They don't. 'django.contrib.auth.search_indexes.UserIndex', 'third_party_blog_app.search_indexes.EntryIndex', ] } } This allows for reliable swapping of the index that handles a model without relying on correct import order. Removal of ``RealTimeSearchIndex`` ================================== Use of the ``haystack.indexes.RealTimeSearchIndex`` is no longer valid. It has been removed in favor of ``RealtimeSignalProcessor``. To migrate, first change the inheritance of all your ``RealTimeSearchIndex`` subclasses to use ``SearchIndex`` instead:: # Old. class MySearchIndex(indexes.RealTimeSearchIndex, indexes.Indexable): # ... # New. class MySearchIndex(indexes.SearchIndex, indexes.Indexable): # ... Then update your settings to enable use of the ``RealtimeSignalProcessor``:: HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.RealtimeSignalProcessor' Done! ===== For most basic uses of Haystack, this is all that is necessary to work with Haystack 2.X. You should rebuild your index if needed & test your new setup. Advanced Uses ============= Swapping Backend ---------------- If you were manually swapping the ``SearchQuery`` or ``SearchBackend`` being used by ``SearchQuerySet`` in the past, it's now preferable to simply setup another connection & use the ``SearchQuerySet.using`` method to select that connection instead. Also, if you were manually instantiating ``SearchBackend`` or ``SearchQuery``, it's now preferable to rely on the connection's engine to return the right thing. For example:: from haystack import connections backend = connections['default'].get_backend() query = connections['default'].get_query() Custom Backends --------------- If you had written a custom ``SearchBackend`` and/or custom ``SearchQuery``, there's a little more work needed to be Haystack 2.X compatible. You should, but don't have to, rename your ``SearchBackend`` & ``SearchQuery`` classes to be more descriptive/less collide-y. For example, ``solr_backend.SearchBackend`` became ``solr_backend.SolrSearchBackend``. This prevents non-namespaced imports from stomping on each other. You need to add a new class to your backend, subclassing ``BaseEngine``. This allows specifying what ``backend`` & ``query`` should be used on a connection with less duplication/naming trickery. It goes at the bottom of the file (so that the classes are defined above it) and should look like:: from haystack.backends import BaseEngine from haystack.backends.solr_backend import SolrSearchQuery # Code then... class MyCustomSolrEngine(BaseEngine): # Use our custom backend. backend = MySolrBackend # Use the built-in Solr query. query = SolrSearchQuery Your ``HAYSTACK_CONNECTIONS['default']['ENGINE']`` should then point to the full Python import path to your new ``BaseEngine`` subclass. Finally, you will likely have to adjust the ``SearchBackend.__init__`` & ``SearchQuery.__init__``, as they have changed significantly. Please refer to the commits for those backends. django-haystack-2.8.0/docs/multiple_index.rst000066400000000000000000000162421325051407000212760ustar00rootroot00000000000000.. _ref-multiple_index: ================ Multiple Indexes ================ Much like Django's `multiple database support`_, Haystack has "multiple index" support. This allows you to talk to several different engines at the same time. It enables things like master-slave setups, multiple language indexing, separate indexes for general search & autocomplete as well as other options. .. _`multiple database support`: http://docs.djangoproject.com/en/1.3/topics/db/multi-db/ Specifying Available Connections ================================ You can supply as many backends as you like, each with a descriptive name. A complete setup that accesses all backends might look like:: HAYSTACK_CONNECTIONS = { 'default': { 'ENGINE': 'haystack.backends.solr_backend.SolrEngine', 'URL': 'http://localhost:9001/solr/default', 'TIMEOUT': 60 * 5, 'INCLUDE_SPELLING': True, 'BATCH_SIZE': 100, 'SILENTLY_FAIL': True, }, 'autocomplete': { 'ENGINE': 'haystack.backends.whoosh_backend.WhooshEngine', 'PATH': '/home/search/whoosh_index', 'STORAGE': 'file', 'POST_LIMIT': 128 * 1024 * 1024, 'INCLUDE_SPELLING': True, 'BATCH_SIZE': 100, 'SILENTLY_FAIL': True, }, 'slave': { 'ENGINE': 'xapian_backend.XapianEngine', 'PATH': '/home/search/xapian_index', 'INCLUDE_SPELLING': True, 'BATCH_SIZE': 100, 'SILENTLY_FAIL': True, }, 'db': { 'ENGINE': 'haystack.backends.simple_backend.SimpleEngine', 'SILENTLY_FAIL': True, } } You are required to have at least one connection listed within ``HAYSTACK_CONNECTIONS``, it must be named ``default`` & it must have a valid ``ENGINE`` within it. Management Commands =================== All management commands that manipulate data use **ONLY** one connection at a time. By default, they use the ``default`` index but accept a ``--using`` flag to specify a different connection. For example:: ./manage.py rebuild_index --noinput --using=whoosh Automatic Routing ================= To make the selection of the correct index easier, Haystack (like Django) has the concept of "routers". All provided routers are checked whenever a read or write happens, in the order in which they are defined. For read operations (when a search query is executed), the ``for_read`` method of each router is called, until one of them returns an index, which is used for the read operation. For write operations (when a delete or update is executed), the ``for_write`` method of each router is called, and the results are aggregated. All of the indexes that were returned are then updated. Haystack ships with a ``DefaultRouter`` enabled. It looks like:: class DefaultRouter(BaseRouter): def for_read(self, **hints): return DEFAULT_ALIAS def for_write(self, **hints): return DEFAULT_ALIAS This means that the default index is used for all read and write operations. If the ``for_read`` or ``for_write`` method doesn't exist or returns ``None``, that indicates that the current router can't handle the data. The next router is then checked. The ``for_write`` method can return either a single string representing an index name, or an iterable of such index names. For example:: class UpdateEverythingRouter(BaseRouter): def for_write(self, **hints): return ('myindex1', 'myindex2') The ``hints`` passed can be anything that helps the router make a decision. This data should always be considered optional & be guarded against. At current, ``for_write`` receives an ``index`` option (pointing to the ``SearchIndex`` calling it) while ``for_read`` may receive ``models`` (being a list of ``Model`` classes the ``SearchQuerySet`` may be looking at). You may provide as many routers as you like by overriding the ``HAYSTACK_ROUTERS`` setting. For example:: HAYSTACK_ROUTERS = ['myapp.routers.MasterRouter', 'myapp.routers.SlaveRouter', 'haystack.routers.DefaultRouter'] Master-Slave Example -------------------- The ``MasterRouter`` & ``SlaveRouter`` might look like:: from haystack import routers class MasterRouter(routers.BaseRouter): def for_write(self, **hints): return 'master' def for_read(self, **hints): return None class SlaveRouter(routers.BaseRouter): def for_write(self, **hints): return None def for_read(self, **hints): return 'slave' The observant might notice that since the methods don't overlap, this could be combined into one ``Router`` like so:: from haystack import routers class MasterSlaveRouter(routers.BaseRouter): def for_write(self, **hints): return 'master' def for_read(self, **hints): return 'slave' Manually Selecting ================== There may be times when automatic selection of the correct index is undesirable, such as when fixing erroneous data in an index or when you know exactly where data should be located. For this, the ``SearchQuerySet`` class allows for manually selecting the index via the ``SearchQuerySet.using`` method:: from haystack.query import SearchQuerySet # Uses the routers' opinion. sqs = SearchQuerySet().auto_query('banana') # Forces the default. sqs = SearchQuerySet().using('default').auto_query('banana') # Forces the slave connection (presuming it was setup). sqs = SearchQuerySet().using('slave').auto_query('banana') .. warning:: Note that the models a ``SearchQuerySet`` is trying to pull from must all come from the same index. Haystack is not able to combine search queries against different indexes. Custom Index Selection ====================== If a specific backend has been selected, the ``SearchIndex.index_queryset`` and ``SearchIndex.read_queryset`` will receive the backend name, giving indexes the opportunity to customize the returned queryset. For example, a site which uses separate indexes for recent items and older content might define ``index_queryset`` to filter the items based on date:: def index_queryset(self, using=None): qs = Note.objects.all() archive_limit = datetime.datetime.now() - datetime.timedelta(days=90) if using == "archive": return qs.filter(pub_date__lte=archive_limit) else: return qs.filter(pub_date__gte=archive_limit) Multi-lingual Content --------------------- Most search engines require you to set the language at the index level. For example, a multi-lingual site using Solr can use `multiple cores `_ and corresponding Haystack backends using the language name. Under this scenario, queries are simple:: sqs = SearchQuerySet.using(lang).auto_query(…) During index updates, the Index's ``index_queryset`` method will need to filter the items to avoid sending the wrong content to the search engine:: def index_queryset(self, using=None): return Post.objects.filter(language=using) django-haystack-2.8.0/docs/other_apps.rst000066400000000000000000000061221325051407000204140ustar00rootroot00000000000000.. _ref-other_apps: ============================= Haystack-Related Applications ============================= Sub Apps ======== These are apps that build on top of the infrastructure provided by Haystack. Useful for essentially extending what Haystack can do. queued_search ------------- http://github.com/toastdriven/queued_search (2.X compatible) Provides a queue-based setup as an alternative to ``RealtimeSignalProcessor`` or constantly running the ``update_index`` command. Useful for high-load, short update time situations. celery-haystack --------------- https://github.com/jezdez/celery-haystack (1.X and 2.X compatible) Also provides a queue-based setup, this time centered around Celery. Useful for keeping the index fresh per model instance or with the included task to call the ``update_index`` management command instead. haystack-rqueue --------------- https://github.com/mandx/haystack-rqueue (2.X compatible) Also provides a queue-based setup, this time centered around RQ. Useful for keeping the index fresh using ``./manage.py rqworker``. django-celery-haystack ---------------------- https://github.com/mixcloud/django-celery-haystack-SearchIndex Another queue-based setup, also around Celery. Useful for keeping the index fresh. saved_searches -------------- http://github.com/toastdriven/saved_searches (2.X compatible) Adds personalization to search. Retains a history of queries run by the various users on the site (including anonymous users). This can be used to present the user with their search history and provide most popular/most recent queries on the site. saved-search ------------ https://github.com/DirectEmployers/saved-search An alternate take on persisting user searches, this has a stronger focus on locale-based searches as well as further integration. haystack-static-pages --------------------- http://github.com/trapeze/haystack-static-pages Provides a simple way to index flat (non-model-based) content on your site. By using the management command that comes with it, it can crawl all pertinent pages on your site and add them to search. django-tumbleweed ----------------- http://github.com/mcroydon/django-tumbleweed Provides a tumblelog-like view to any/all Haystack-enabled models on your site. Useful for presenting date-based views of search data. Attempts to avoid the database completely where possible. Haystack-Enabled Apps ===================== These are reusable apps that ship with ``SearchIndexes``, suitable for quick integration with Haystack. * django-faq (freq. asked questions app) - http://github.com/benspaulding/django-faq * django-essays (blog-like essay app) - http://github.com/bkeating/django-essays * gtalug (variety of apps) - http://github.com/myles/gtalug * sciencemuseum (science museum open data) - http://github.com/simonw/sciencemuseum * vz-wiki (wiki) - http://github.com/jobscry/vz-wiki * ffmff (events app) - http://github.com/stefreak/ffmff * Dinette (forums app) - http://github.com/uswaretech/Dinette * fiftystates_site (site) - http://github.com/sunlightlabs/fiftystates_site * Open-Knesset (site) - http://github.com/ofri/Open-Knesset django-haystack-2.8.0/docs/python3.rst000066400000000000000000000031111325051407000176470ustar00rootroot00000000000000.. _ref-python3: ================ Python 3 Support ================ As of Haystack v2.1.0, it has been ported to support both Python 2 & Python 3 within the same codebase. This builds on top of what `six`_ & `Django`_ provide. No changes are required for anyone running an existing Haystack installation. The API is completely backward-compatible, so you should be able to run your existing software without modification. Virtually all tests pass under both Python 2 & 3, with a small number of expected failures under Python (typically related to ordering, see below). .. _`six`: http://pythonhosted.org/six/ .. _`Django`: https://docs.djangoproject.com/en/1.5/topics/python3/#str-and-unicode-methods Supported Backends ================== The following backends are fully supported under Python 3. However, you may need to update these dependencies if you have a pre-existing setup. * Solr (pysolr>=3.1.0) * Elasticsearch Notes ===== Testing ------- If you were testing things such as the query generated by a given ``SearchQuerySet`` or how your forms would render, under Python 3.3.2+, `hash randomization`_ is in effect, which means that the ordering of dictionaries is no longer consistent, even on the same platform. Haystack took the approach of abandoning making assertions about the entire structure. Instead, we either simply assert that the new object contains the right things or make a call to ``sorted(...)`` around it to ensure order. It is recommended you take a similar approach. .. _`hash randomization`: http://docs.python.org/3/whatsnew/3.3.html#builtin-functions-and-types django-haystack-2.8.0/docs/rich_content_extraction.rst000066400000000000000000000051221325051407000231660ustar00rootroot00000000000000.. _ref-rich_content_extraction: ======================= Rich Content Extraction ======================= For some projects it is desirable to index text content which is stored in structured files such as PDFs, Microsoft Office documents, images, etc. Currently only Solr's `ExtractingRequestHandler`_ is directly supported by Haystack but the approach below could be used with any backend which supports this feature. .. _`ExtractingRequestHandler`: http://wiki.apache.org/solr/ExtractingRequestHandler Extracting Content ================== :meth:`SearchBackend.extract_file_contents` accepts a file or file-like object and returns a dictionary containing two keys: ``metadata`` and ``contents``. The ``contents`` value will be a string containing all of the text which the backend managed to extract from the file contents. ``metadata`` will always be a dictionary but the keys and values will vary based on the underlying extraction engine and the type of file provided. Indexing Extracted Content ========================== Generally you will want to include the extracted text in your main document field along with everything else specified in your search template. This example shows how to override a hypothetical ``FileIndex``'s ``prepare`` method to include the extract content along with information retrieved from the database:: def prepare(self, obj): data = super(FileIndex, self).prepare(obj) # This could also be a regular Python open() call, a StringIO instance # or the result of opening a URL. Note that due to a library limitation # file_obj must have a .name attribute even if you need to set one # manually before calling extract_file_contents: file_obj = obj.the_file.open() extracted_data = self.get_backend().extract_file_contents(file_obj) # Now we'll finally perform the template processing to render the # text field with *all* of our metadata visible for templating: t = loader.select_template(('search/indexes/myapp/file_text.txt', )) data['text'] = t.render(Context({'object': obj, 'extracted': extracted_data})) return data This allows you to insert the extracted text at the appropriate place in your template, modified or intermixed with database content as appropriate: .. code-block:: html+django {{ object.title }} {{ object.owner.name }} … {% for k, v in extracted.metadata.items %} {% for val in v %} {{ k }}: {{ val|safe }} {% endfor %} {% endfor %} {{ extracted.contents|striptags|safe }}django-haystack-2.8.0/docs/running_tests.rst000066400000000000000000000045131325051407000211540ustar00rootroot00000000000000.. _ref-running-tests: ============= Running Tests ============= Everything ========== The simplest way to get up and running with Haystack's tests is to run:: python setup.py test This installs all of the backend libraries & all dependencies for getting the tests going and runs the tests. You will still have to setup search servers (for running Solr tests, the spatial Solr tests & the Elasticsearch tests). Cherry-Picked ============= If you'd rather not run all the tests, run only the backends you need since tests for backends that are not running will be skipped. ``Haystack`` is maintained with all tests passing at all times, so if you receive any errors during testing, please check your setup and file a report if the errors persist. To run just a portion of the tests you can use the script ``run_tests.py`` and just specify the files or directories you wish to run, for example:: cd test_haystack ./run_tests.py whoosh_tests test_loading.py The ``run_tests.py`` script is just a tiny wrapper around the nose_ library and any options you pass to it will be passed on; including ``--help`` to get a list of possible options:: cd test_haystack ./run_tests.py --help .. _nose: https://nose.readthedocs.io/en/latest/ Configuring Solr ================ Haystack assumes that you have a Solr server running on port ``9001`` which uses the schema and configuration provided in the ``test_haystack/solr_tests/server/`` directory. For convenience, a script is provided which will download, configure and start a test Solr server:: test_haystack/solr_tests/server/start-solr-test-server.sh If no server is found all solr-related tests will be skipped. Configuring Elasticsearch ========================= The test suite will try to connect to Elasticsearch on port ``9200``. If no server is found all elasticsearch tests will be skipped. Note that the tests are destructive - during the teardown phase they will wipe the cluster clean so make sure you don't run them against an instance with data you wish to keep. If you want to run the geo-django tests you may need to review the `GeoDjango GEOS and GDAL settings`_ before running these commands:: cd test_haystack ./run_tests.py elasticsearch_tests .. _GeoDjango GEOS and GDAL settings: https://docs.djangoproject.com/en/1.7/ref/contrib/gis/install/geolibs/#geos-library-path django-haystack-2.8.0/docs/searchbackend_api.rst000066400000000000000000000074151325051407000216640ustar00rootroot00000000000000.. _ref-searchbackend-api: ===================== ``SearchBackend`` API ===================== .. class:: SearchBackend(connection_alias, **connection_options) The ``SearchBackend`` class handles interaction directly with the backend. The search query it performs is usually fed to it from a ``SearchQuery`` class that has been built for that backend. This class must be at least partially implemented on a per-backend basis and is usually accompanied by a ``SearchQuery`` class within the same module. Unless you are writing a new backend, it is unlikely you need to directly access this class. Method Reference ================ ``update`` ---------- .. method:: SearchBackend.update(self, index, iterable) Updates the backend when given a ``SearchIndex`` and a collection of documents. This method MUST be implemented by each backend, as it will be highly specific to each one. ``remove`` ---------- .. method:: SearchBackend.remove(self, obj_or_string) Removes a document/object from the backend. Can be either a model instance or the identifier (i.e. ``app_name.model_name.id``) in the event the object no longer exists. This method MUST be implemented by each backend, as it will be highly specific to each one. ``clear`` --------- .. method:: SearchBackend.clear(self, models=[]) Clears the backend of all documents/objects for a collection of models. This method MUST be implemented by each backend, as it will be highly specific to each one. ``search`` ---------- .. method:: SearchBackend.search(self, query_string, sort_by=None, start_offset=0, end_offset=None, fields='', highlight=False, facets=None, date_facets=None, query_facets=None, narrow_queries=None, spelling_query=None, limit_to_registered_models=None, result_class=None, **kwargs) Takes a query to search on and returns a dictionary. The query should be a string that is appropriate syntax for the backend. The returned dictionary should contain the keys 'results' and 'hits'. The 'results' value should be an iterable of populated ``SearchResult`` objects. The 'hits' should be an integer count of the number of matched results the search backend found. This method MUST be implemented by each backend, as it will be highly specific to each one. ``extract_file_contents`` ------------------------- .. method:: SearchBackend.extract_file_contents(self, file_obj) Perform text extraction on the provided file or file-like object. Returns either None or a dictionary containing the keys ``contents`` and ``metadata``. The ``contents`` field will always contain the extracted text content returned by the underlying search engine but ``metadata`` may vary considerably based on the backend and the input file. ``prep_value`` -------------- .. method:: SearchBackend.prep_value(self, value) Hook to give the backend a chance to prep an attribute value before sending it to the search engine. By default, just force it to unicode. ``more_like_this`` ------------------ .. method:: SearchBackend.more_like_this(self, model_instance, additional_query_string=None, result_class=None) Takes a model object and returns results the backend thinks are similar. This method MUST be implemented by each backend, as it will be highly specific to each one. ``build_schema`` ---------------- .. method:: SearchBackend.build_schema(self, fields) Takes a dictionary of fields and returns schema information. This method MUST be implemented by each backend, as it will be highly specific to each one. ``build_models_list`` --------------------- .. method:: SearchBackend.build_models_list(self) Builds a list of models for searching. The ``search`` method should use this and the ``django_ct`` field to narrow the results (unless the user indicates not to). This helps ignore any results that are not currently handled models and ensures consistent caching. django-haystack-2.8.0/docs/searchfield_api.rst000066400000000000000000000164031325051407000213550ustar00rootroot00000000000000.. _ref-searchfield-api: =================== ``SearchField`` API =================== .. class:: SearchField The ``SearchField`` and its subclasses provides a way to declare what data you're interested in indexing. They are used with ``SearchIndexes``, much like ``forms.*Field`` are used within forms or ``models.*Field`` within models. They provide both the means for storing data in the index, as well as preparing the data before it's placed in the index. Haystack uses all fields from all ``SearchIndex`` classes to determine what the engine's index schema ought to look like. In practice, you'll likely never actually use the base ``SearchField``, as the subclasses are much better at handling real data. Subclasses ========== Included with Haystack are the following field types: * ``BooleanField`` * ``CharField`` * ``DateField`` * ``DateTimeField`` * ``DecimalField`` * ``EdgeNgramField`` * ``FloatField`` * ``IntegerField`` * ``LocationField`` * ``MultiValueField`` * ``NgramField`` And equivalent faceted versions: * ``FacetBooleanField`` * ``FacetCharField`` * ``FacetDateField`` * ``FacetDateTimeField`` * ``FacetDecimalField`` * ``FacetFloatField`` * ``FacetIntegerField`` * ``FacetMultiValueField`` .. note:: There is no faceted variant of the n-gram fields. Because of how the engine generates n-grams, faceting on these field types (``NgramField`` & ``EdgeNgram``) would make very little sense. Usage ===== While ``SearchField`` objects can be used on their own, they're generally used within a ``SearchIndex``. You use them in a declarative manner, just like fields in ``django.forms.Form`` or ``django.db.models.Model`` objects. For example:: from haystack import indexes from myapp.models import Note class NoteIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) author = indexes.CharField(model_attr='user') pub_date = indexes.DateTimeField(model_attr='pub_date') def get_model(self): return Note This will hook up those fields with the index and, when updating a ``Model`` object, pull the relevant data out and prepare it for storage in the index. Field Options ============= ``default`` ----------- .. attribute:: SearchField.default Provides a means for specifying a fallback value in the event that no data is found for the field. Can be either a value or a callable. ``document`` ------------ .. attribute:: SearchField.document A boolean flag that indicates which of the fields in the ``SearchIndex`` ought to be the primary field for searching within. Default is ``False``. .. note:: Only one field can be marked as the ``document=True`` field, so you should standardize this name and the format of the field between all of your ``SearchIndex`` classes. ``indexed`` ----------- .. attribute:: SearchField.indexed A boolean flag for indicating whether or not the data from this field will be searchable within the index. Default is ``True``. The companion of this option is ``stored``. ``index_fieldname`` ------------------- .. attribute:: SearchField.index_fieldname The ``index_fieldname`` option allows you to force the name of the field in the index. This does not change how Haystack refers to the field. This is useful when using Solr's dynamic attributes or when integrating with other external software. Default is variable name of the field within the ``SearchIndex``. ``model_attr`` -------------- .. attribute:: SearchField.model_attr The ``model_attr`` option is a shortcut for preparing data. Rather than having to manually fetch data out of a ``Model``, ``model_attr`` allows you to specify a string that will automatically pull data out for you. For example:: # Automatically looks within the model and populates the field with # the ``last_name`` attribute. author = CharField(model_attr='last_name') It also handles callables:: # On a ``User`` object, pulls the full name as pieced together by the # ``get_full_name`` method. author = CharField(model_attr='get_full_name') And can look through relations:: # Pulls the ``bio`` field from a ``UserProfile`` object that has a # ``OneToOneField`` relationship to a ``User`` object. biography = CharField(model_attr='user__profile__bio') ``null`` -------- .. attribute:: SearchField.null A boolean flag for indicating whether or not it's permissible for the field not to contain any data. Default is ``False``. .. note:: Unlike Django's database layer, which injects a ``NULL`` into the database when a field is marked nullable, ``null=True`` will actually exclude that field from being included with the document. This is more efficient for the search engine to deal with. ``stored`` ---------- .. attribute:: SearchField.stored A boolean flag for indicating whether or not the data from this field will be stored within the index. Default is ``True``. This is useful for pulling data out of the index along with the search result in order to save on hits to the database. The companion of this option is ``indexed``. ``template_name`` ----------------- .. attribute:: SearchField.template_name Allows you to override the name of the template to use when preparing data. By default, the data templates for fields are located within your ``TEMPLATE_DIRS`` under a path like ``search/indexes/{app_label}/{model_name}_{field_name}.txt``. This option lets you override that path (though still within ``TEMPLATE_DIRS``). Example:: bio = CharField(use_template=True, template_name='myapp/data/bio.txt') You can also provide a list of templates, as ``loader.select_template`` is used under the hood. Example:: bio = CharField(use_template=True, template_name=['myapp/data/bio.txt', 'myapp/bio.txt', 'bio.txt']) ``use_template`` ---------------- .. attribute:: SearchField.use_template A boolean flag for indicating whether or not a field should prepare its data via a data template or not. Default is False. Data templates are extremely useful, as they let you easily tie together different parts of the ``Model`` (and potentially related models). This leads to better search results with very little effort. Method Reference ================ ``__init__`` ------------ .. method:: SearchField.__init__(self, model_attr=None, use_template=False, template_name=None, document=False, indexed=True, stored=True, faceted=False, default=NOT_PROVIDED, null=False, index_fieldname=None, facet_class=None, boost=1.0, weight=None) Instantiates a fresh ``SearchField`` instance. ``has_default`` --------------- .. method:: SearchField.has_default(self) Returns a boolean of whether this field has a default value. ``prepare`` ----------- .. method:: SearchField.prepare(self, obj) Takes data from the provided object and prepares it for storage in the index. ``prepare_template`` -------------------- .. method:: SearchField.prepare_template(self, obj) Flattens an object for indexing. This loads a template (``search/indexes/{app_label}/{model_name}_{field_name}.txt``) and returns the result of rendering that template. ``object`` will be in its context. ``convert`` ----------- .. method:: SearchField.convert(self, value) Handles conversion between the data found and the type of the field. Extending classes should override this method and provide correct data coercion. django-haystack-2.8.0/docs/searchindex_api.rst000066400000000000000000000536361325051407000214120ustar00rootroot00000000000000.. _ref-searchindex-api: =================== ``SearchIndex`` API =================== .. class:: SearchIndex() The ``SearchIndex`` class allows the application developer a way to provide data to the backend in a structured format. Developers familiar with Django's ``Form`` or ``Model`` classes should find the syntax for indexes familiar. This class is arguably the most important part of integrating Haystack into your application, as it has a large impact on the quality of the search results and how easy it is for users to find what they're looking for. Care and effort should be put into making your indexes the best they can be. Quick Start =========== For the impatient:: import datetime from haystack import indexes from myapp.models import Note class NoteIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) author = indexes.CharField(model_attr='user') pub_date = indexes.DateTimeField(model_attr='pub_date') def get_model(self): return Note def index_queryset(self, using=None): "Used when the entire index for model is updated." return self.get_model().objects.filter(pub_date__lte=datetime.datetime.now()) Background ========== Unlike relational databases, most search engines supported by Haystack are primarily document-based. They focus on a single text blob which they tokenize, analyze and index. When searching, this field is usually the primary one that is searched. Further, the schema used by most engines is the same for all types of data added, unlike a relational database that has a table schema for each chunk of data. It may be helpful to think of your search index as something closer to a key-value store instead of imagining it in terms of a RDBMS. Why Create Fields? ------------------ Despite being primarily document-driven, most search engines also support the ability to associate other relevant data with the indexed document. These attributes can be mapped through the use of fields within Haystack. Common uses include storing pertinent data information, categorizations of the document, author information and related data. By adding fields for these pieces of data, you provide a means to further narrow/filter search terms. This can be useful from either a UI perspective (a better advanced search form) or from a developer standpoint (section-dependent search, off-loading certain tasks to search, et cetera). .. warning:: Haystack reserves the following field names for internal use: ``id``, ``django_ct``, ``django_id`` & ``content``. The ``name`` & ``type`` names used to be reserved but no longer are. You can override these field names using the ``HAYSTACK_ID_FIELD``, ``HAYSTACK_DJANGO_CT_FIELD`` & ``HAYSTACK_DJANGO_ID_FIELD`` if needed. Significance Of ``document=True`` --------------------------------- Most search engines that were candidates for inclusion in Haystack all had a central concept of a document that they indexed. These documents form a corpus within which to primarily search. Because this ideal is so central and most of Haystack is designed to have pluggable backends, it is important to ensure that all engines have at least a bare minimum of the data they need to function. As a result, when creating a ``SearchIndex``, one (and only one) field must be marked with ``document=True``. This signifies to Haystack that whatever is placed in this field while indexing is to be the primary text the search engine indexes. The name of this field can be almost anything, but ``text`` is one of the more common names used. Stored/Indexed Fields --------------------- One shortcoming of the use of search is that you rarely have all or the most up-to-date information about an object in the index. As a result, when retrieving search results, you will likely have to access the object in the database to provide better information. However, this can also hit the database quite heavily (think ``.get(pk=result.id)`` per object). If your search is popular, this can lead to a big performance hit. There are two ways to prevent this. The first way is ``SearchQuerySet.load_all``, which tries to group all similar objects and pull them through one query instead of many. This still hits the DB and incurs a performance penalty. The other option is to leverage stored fields. By default, all fields in Haystack are either indexed (searchable by the engine) or stored (retained by the engine and presented in the results). By using a stored field, you can store commonly used data in such a way that you don't need to hit the database when processing the search result to get more information. For example, one great way to leverage this is to pre-render an object's search result template DURING indexing. You define an additional field, render a template with it and it follows the main indexed record into the index. Then, when that record is pulled when it matches a query, you can simply display the contents of that field, which avoids the database hit.: Within ``myapp/search_indexes.py``:: class NoteIndex(SearchIndex, indexes.Indexable): text = CharField(document=True, use_template=True) author = CharField(model_attr='user') pub_date = DateTimeField(model_attr='pub_date') # Define the additional field. rendered = CharField(use_template=True, indexed=False) Then, inside a template named ``search/indexes/myapp/note_rendered.txt``::

{{ object.title }}

{{ object.content }}

And finally, in ``search/search.html``:: ... {% for result in page.object_list %}
{{ result.rendered|safe }}
{% endfor %} Keeping The Index Fresh ======================= There are several approaches to keeping the search index in sync with your database. None are more correct than the others and which one you use depends on the traffic you see, the churn rate of your data, and what concerns are important to you (CPU load, how recent, et cetera). The conventional method is to use ``SearchIndex`` in combination with cron jobs. Running a ``./manage.py update_index`` every couple hours will keep your data in sync within that timeframe and will handle the updates in a very efficient batch. Additionally, Whoosh (and to a lesser extent Xapian) behaves better when using this approach. Another option is to use ``RealtimeSignalProcessor``, which uses Django's signals to immediately update the index any time a model is saved/deleted. This yields a much more current search index at the expense of being fairly inefficient. Solr & Elasticsearch are the only backends that handles this well under load, and even then, you should make sure you have the server capacity to spare. A third option is to develop a custom ``QueuedSignalProcessor`` that, much like ``RealtimeSignalProcessor``, uses Django's signals to enqueue messages for updates/deletes. Then writing a management command to consume these messages in batches, yielding a nice compromise between the previous two options. For more information see :doc:`signal_processors`. .. note:: Haystack doesn't ship with a ``QueuedSignalProcessor`` largely because there is such a diversity of lightweight queuing options and that they tend to polarize developers. Queuing is outside of Haystack's goals (provide good, powerful search) and, as such, is left to the developer. Additionally, the implementation is relatively trivial & there are already good third-party add-ons for Haystack to enable this. Advanced Data Preparation ========================= In most cases, using the `model_attr` parameter on your fields allows you to easily get data from a Django model to the document in your index, as it handles both direct attribute access as well as callable functions within your model. .. note:: The ``model_attr`` keyword argument also can look through relations in models. So you can do something like ``model_attr='author__first_name'`` to pull just the first name of the author, similar to some lookups used by Django's ORM. However, sometimes, even more control over what gets placed in your index is needed. To facilitate this, ``SearchIndex`` objects have a 'preparation' stage that populates data just before it is indexed. You can hook into this phase in several ways. This should be very familiar to developers who have used Django's ``forms`` before as it loosely follows similar concepts, though the emphasis here is less on cleansing data from user input and more on making the data friendly to the search backend. 1. ``prepare_FOO(self, object)`` -------------------------------- The most common way to affect a single field's data is to create a ``prepare_FOO`` method (where FOO is the name of the field). As a parameter to this method, you will receive the instance that is attempting to be indexed. .. note:: This method is analogous to Django's ``Form.clean_FOO`` methods. To keep with our existing example, one use case might be altering the name inside the ``author`` field to be "firstname lastname ". In this case, you might write the following code:: class NoteIndex(SearchIndex, indexes.Indexable): text = CharField(document=True, use_template=True) author = CharField(model_attr='user') pub_date = DateTimeField(model_attr='pub_date') def get_model(self): return Note def prepare_author(self, obj): return "%s <%s>" % (obj.user.get_full_name(), obj.user.email) This method should return a single value (or list/tuple/dict) to populate that field's data upon indexing. Note that this method takes priority over whatever data may come from the field itself. Just like ``Form.clean_FOO``, the field's ``prepare`` runs before the ``prepare_FOO``, allowing you to access ``self.prepared_data``. For example:: class NoteIndex(SearchIndex, indexes.Indexable): text = CharField(document=True, use_template=True) author = CharField(model_attr='user') pub_date = DateTimeField(model_attr='pub_date') def get_model(self): return Note def prepare_author(self, obj): # Say we want last name first, the hard way. author = u'' if 'author' in self.prepared_data: name_bits = self.prepared_data['author'].split() author = "%s, %s" % (name_bits[-1], ' '.join(name_bits[:-1])) return author This method is fully function with ``model_attr``, so if there's no convenient way to access the data you want, this is an excellent way to prepare it:: class NoteIndex(SearchIndex, indexes.Indexable): text = CharField(document=True, use_template=True) author = CharField(model_attr='user') categories = MultiValueField() pub_date = DateTimeField(model_attr='pub_date') def get_model(self): return Note def prepare_categories(self, obj): # Since we're using a M2M relationship with a complex lookup, # we can prepare the list here. return [category.id for category in obj.category_set.active().order_by('-created')] 2. ``prepare(self, object)`` ---------------------------- Each ``SearchIndex`` gets a ``prepare`` method, which handles collecting all the data. This method should return a dictionary that will be the final data used by the search backend. Overriding this method is useful if you need to collect more than one piece of data or need to incorporate additional data that is not well represented by a single ``SearchField``. An example might look like:: class NoteIndex(SearchIndex, indexes.Indexable): text = CharField(document=True, use_template=True) author = CharField(model_attr='user') pub_date = DateTimeField(model_attr='pub_date') def get_model(self): return Note def prepare(self, object): self.prepared_data = super(NoteIndex, self).prepare(object) # Add in tags (assuming there's a M2M relationship to Tag on the model). # Note that this would NOT get picked up by the automatic # schema tools provided by Haystack. self.prepared_data['tags'] = [tag.name for tag in object.tags.all()] return self.prepared_data If you choose to use this method, you should make a point to be careful to call the ``super()`` method before altering the data. Without doing so, you may have an incomplete set of data populating your indexes. This method has the final say in all data, overriding both what the fields provide as well as any ``prepare_FOO`` methods on the class. .. note:: This method is roughly analogous to Django's ``Form.full_clean`` and ``Form.clean`` methods. However, unlike these methods, it is not fired as the result of trying to access ``self.prepared_data``. It requires an explicit call. 3. Overriding ``prepare(self, object)`` On Individual ``SearchField`` Objects ----------------------------------------------------------------------------- The final way to manipulate your data is to implement a custom ``SearchField`` object and write its ``prepare`` method to populate/alter the data any way you choose. For instance, a (naive) user-created ``GeoPointField`` might look something like:: from django.utils import six from haystack import indexes class GeoPointField(indexes.CharField): def __init__(self, **kwargs): kwargs['default'] = '0.00-0.00' super(GeoPointField, self).__init__(**kwargs) def prepare(self, obj): return six.text_type("%s-%s" % (obj.latitude, obj.longitude)) The ``prepare`` method simply returns the value to be used for that field. It's entirely possible to include data that's not directly referenced to the object here, depending on your needs. Note that this is NOT a recommended approach to storing geographic data in a search engine (there is no formal suggestion on this as support is usually non-existent), merely an example of how to extend existing fields. .. note:: This method is analagous to Django's ``Field.clean`` methods. Adding New Fields ================= If you have an existing ``SearchIndex`` and you add a new field to it, Haystack will add this new data on any updates it sees after that point. However, this will not populate the existing data you already have. In order for the data to be picked up, you will need to run ``./manage.py rebuild_index``. This will cause all backends to rebuild the existing data already present in the quickest and most efficient way. .. note:: With the Solr backend, you'll also have to add to the appropriate ``schema.xml`` for your configuration before running the ``rebuild_index``. ``Search Index`` ================ ``get_model`` ------------- .. method:: SearchIndex.get_model(self) Should return the ``Model`` class (not an instance) that the rest of the ``SearchIndex`` should use. This method is required & you must override it to return the correct class. ``index_queryset`` ------------------ .. method:: SearchIndex.index_queryset(self, using=None) Get the default QuerySet to index when doing a full update. Subclasses can override this method to avoid indexing certain objects. ``read_queryset`` ----------------- .. method:: SearchIndex.read_queryset(self, using=None) Get the default QuerySet for read actions. Subclasses can override this method to work with other managers. Useful when working with default managers that filter some objects. ``build_queryset`` ------------------- .. method:: SearchIndex.build_queryset(self, start_date=None, end_date=None) Get the default QuerySet to index when doing an index update. Subclasses can override this method to take into account related model modification times. The default is to use ``SearchIndex.index_queryset`` and filter based on ``SearchIndex.get_updated_field`` ``prepare`` ----------- .. method:: SearchIndex.prepare(self, obj) Fetches and adds/alters data before indexing. ``get_content_field`` --------------------- .. method:: SearchIndex.get_content_field(self) Returns the field that supplies the primary document to be indexed. ``update`` ---------- .. method:: SearchIndex.update(self, using=None) Updates the entire index. If ``using`` is provided, it specifies which connection should be used. Default relies on the routers to decide which backend should be used. ``update_object`` ----------------- .. method:: SearchIndex.update_object(self, instance, using=None, **kwargs) Update the index for a single object. Attached to the class's post-save hook. If ``using`` is provided, it specifies which connection should be used. Default relies on the routers to decide which backend should be used. ``remove_object`` ----------------- .. method:: SearchIndex.remove_object(self, instance, using=None, **kwargs) Remove an object from the index. Attached to the class's post-delete hook. If ``using`` is provided, it specifies which connection should be used. Default relies on the routers to decide which backend should be used. ``clear`` --------- .. method:: SearchIndex.clear(self, using=None) Clears the entire index. If ``using`` is provided, it specifies which connection should be used. Default relies on the routers to decide which backend should be used. ``reindex`` ----------- .. method:: SearchIndex.reindex(self, using=None) Completely clears the index for this model and rebuilds it. If ``using`` is provided, it specifies which connection should be used. Default relies on the routers to decide which backend should be used. ``get_updated_field`` --------------------- .. method:: SearchIndex.get_updated_field(self) Get the field name that represents the updated date for the model. If specified, this is used by the reindex command to filter out results from the ``QuerySet``, enabling you to reindex only recent records. This method should either return None (reindex everything always) or a string of the ``Model``'s ``DateField``/``DateTimeField`` name. ``should_update`` ----------------- .. method:: SearchIndex.should_update(self, instance, **kwargs) Determine if an object should be updated in the index. It's useful to override this when an object may save frequently and cause excessive reindexing. You should check conditions on the instance and return False if it is not to be indexed. The ``kwargs`` passed along to this method can be the same as the ones passed by Django when a Model is saved/delete, so it's possible to check if the object has been created or not. See ``django.db.models.signals.post_save`` for details on what is passed. By default, returns True (always reindex). ``load_all_queryset`` --------------------- .. method:: SearchIndex.load_all_queryset(self) Provides the ability to override how objects get loaded in conjunction with ``RelatedSearchQuerySet.load_all``. This is useful for post-processing the results from the query, enabling things like adding ``select_related`` or filtering certain data. .. warning:: Utilizing this functionality can have negative performance implications. Please see the section on ``RelatedSearchQuerySet`` within :doc:`searchqueryset_api` for further information. By default, returns ``all()`` on the model's default manager. Example:: class NoteIndex(SearchIndex, indexes.Indexable): text = CharField(document=True, use_template=True) author = CharField(model_attr='user') pub_date = DateTimeField(model_attr='pub_date') def get_model(self): return Note def load_all_queryset(self): # Pull all objects related to the Note in search results. return Note.objects.all().select_related() When searching, the ``RelatedSearchQuerySet`` appends on a call to ``in_bulk``, so be sure that the ``QuerySet`` you provide can accommodate this and that the ids passed to ``in_bulk`` will map to the model in question. If you need a specific ``QuerySet`` in one place, you can specify this at the ``RelatedSearchQuerySet`` level using the ``load_all_queryset`` method. See :doc:`searchqueryset_api` for usage. ``ModelSearchIndex`` ==================== The ``ModelSearchIndex`` class allows for automatic generation of a ``SearchIndex`` based on the fields of the model assigned to it. With the exception of the automated introspection, it is a ``SearchIndex`` class, so all notes above pertaining to ``SearchIndexes`` apply. As with the ``ModelForm`` class in Django, it employs an inner class called ``Meta``, which should contain a ``model`` attribute. By default all non-relational model fields are included as search fields on the index, but fields can be restricted by way of a ``fields`` whitelist, or excluded with an ``excludes`` list, to prevent certain fields from appearing in the class. In addition, it adds a `text` field that is the ``document=True`` field and has `use_template=True` option set, just like the ``BasicSearchIndex``. .. warning:: Usage of this class might result in inferior ``SearchIndex`` objects, which can directly affect your search results. Use this to establish basic functionality and move to custom `SearchIndex` objects for better control. At this time, it does not handle related fields. Quick Start ----------- For the impatient:: import datetime from haystack import indexes from myapp.models import Note # All Fields class AllNoteIndex(indexes.ModelSearchIndex, indexes.Indexable): class Meta: model = Note # Blacklisted Fields class LimitedNoteIndex(indexes.ModelSearchIndex, indexes.Indexable): class Meta: model = Note excludes = ['user'] # Whitelisted Fields class NoteIndex(indexes.ModelSearchIndex, indexes.Indexable): class Meta: model = Note fields = ['user', 'pub_date'] # Note that regular ``SearchIndex`` methods apply. def index_queryset(self, using=None): "Used when the entire index for model is updated." return Note.objects.filter(pub_date__lte=datetime.datetime.now()) django-haystack-2.8.0/docs/searchquery_api.rst000066400000000000000000000212351325051407000214360ustar00rootroot00000000000000.. _ref-searchquery-api: =================== ``SearchQuery`` API =================== .. class:: SearchQuery(using=DEFAULT_ALIAS) The ``SearchQuery`` class acts as an intermediary between ``SearchQuerySet``'s abstraction and ``SearchBackend``'s actual search. Given the metadata provided by ``SearchQuerySet``, ``SearchQuery`` builds the actual query and interacts with the ``SearchBackend`` on ``SearchQuerySet``'s behalf. This class must be at least partially implemented on a per-backend basis, as portions are highly specific to the backend. It usually is bundled with the accompanying ``SearchBackend``. Most people will **NOT** have to use this class directly. ``SearchQuerySet`` handles all interactions with ``SearchQuery`` objects and provides a nicer interface to work with. Should you need advanced/custom behavior, you can supply your version of ``SearchQuery`` that overrides/extends the class in the manner you see fit. You can either hook it up in a ``BaseEngine`` subclass or ``SearchQuerySet`` objects take a kwarg parameter ``query`` where you can pass in your class. ``SQ`` Objects ============== For expressing more complex queries, especially involving AND/OR/NOT in different combinations, you should use ``SQ`` objects. Like ``django.db.models.Q`` objects, ``SQ`` objects can be passed to ``SearchQuerySet.filter`` and use the familiar unary operators (``&``, ``|`` and ``~``) to generate complex parts of the query. .. warning:: Any data you pass to ``SQ`` objects is passed along **unescaped**. If you don't trust the data you're passing along, you should use the ``clean`` method on your ``SearchQuery`` to sanitize the data. Example:: from haystack.query import SQ # We want "title: Foo AND (tags:bar OR tags:moof)" sqs = SearchQuerySet().filter(title='Foo').filter(SQ(tags='bar') | SQ(tags='moof')) # To clean user-provided data: sqs = SearchQuerySet() clean_query = sqs.query.clean(user_query) sqs = sqs.filter(SQ(title=clean_query) | SQ(tags=clean_query)) Internally, the ``SearchQuery`` object maintains a tree of ``SQ`` objects. Each ``SQ`` object supports what field it looks up against, what kind of lookup (i.e. the ``__`` filters), what value it's looking for, if it's a AND/OR/NOT and tracks any children it may have. The ``SearchQuery.build_query`` method starts with the root of the tree, building part of the final query at each node until the full final query is ready for the ``SearchBackend``. Backend-Specific Methods ======================== When implementing a new backend, the following methods will need to be created: ``build_query_fragment`` ~~~~~~~~~~~~~~~~~~~~~~~~ .. method:: SearchQuery.build_query_fragment(self, field, filter_type, value) Generates a query fragment from a field, filter type and a value. Must be implemented in backends as this will be highly backend specific. Inheritable Methods =================== The following methods have a complete implementation in the base class and can largely be used unchanged. ``build_query`` ~~~~~~~~~~~~~~~ .. method:: SearchQuery.build_query(self) Interprets the collected query metadata and builds the final query to be sent to the backend. ``build_params`` ~~~~~~~~~~~~~~~~ .. method:: SearchQuery.build_params(self, spelling_query=None) Generates a list of params to use when searching. ``clean`` ~~~~~~~~~ .. method:: SearchQuery.clean(self, query_fragment) Provides a mechanism for sanitizing user input before presenting the value to the backend. A basic (override-able) implementation is provided. ``run`` ~~~~~~~ .. method:: SearchQuery.run(self, spelling_query=None, **kwargs) Builds and executes the query. Returns a list of search results. Optionally passes along an alternate query for spelling suggestions. Optionally passes along more kwargs for controlling the search query. ``run_mlt`` ~~~~~~~~~~~ .. method:: SearchQuery.run_mlt(self, **kwargs) Executes the More Like This. Returns a list of search results similar to the provided document (and optionally query). ``run_raw`` ~~~~~~~~~~~ .. method:: SearchQuery.run_raw(self, **kwargs) Executes a raw query. Returns a list of search results. ``get_count`` ~~~~~~~~~~~~~ .. method:: SearchQuery.get_count(self) Returns the number of results the backend found for the query. If the query has not been run, this will execute the query and store the results. ``get_results`` ~~~~~~~~~~~~~~~ .. method:: SearchQuery.get_results(self, **kwargs) Returns the results received from the backend. If the query has not been run, this will execute the query and store the results. ``get_facet_counts`` ~~~~~~~~~~~~~~~~~~~~ .. method:: SearchQuery.get_facet_counts(self) Returns the results received from the backend. If the query has not been run, this will execute the query and store the results. ``boost_fragment`` ~~~~~~~~~~~~~~~~~~ .. method:: SearchQuery.boost_fragment(self, boost_word, boost_value) Generates query fragment for boosting a single word/value pair. ``matching_all_fragment`` ~~~~~~~~~~~~~~~~~~~~~~~~~ .. method:: SearchQuery.matching_all_fragment(self) Generates the query that matches all documents. ``add_filter`` ~~~~~~~~~~~~~~ .. method:: SearchQuery.add_filter(self, expression, value, use_not=False, use_or=False) Narrows the search by requiring certain conditions. ``add_order_by`` ~~~~~~~~~~~~~~~~ .. method:: SearchQuery.add_order_by(self, field) Orders the search result by a field. ``clear_order_by`` ~~~~~~~~~~~~~~~~~~ .. method:: SearchQuery.clear_order_by(self) Clears out all ordering that has been already added, reverting the query to relevancy. ``add_model`` ~~~~~~~~~~~~~ .. method:: SearchQuery.add_model(self, model) Restricts the query requiring matches in the given model. This builds upon previous additions, so you can limit to multiple models by chaining this method several times. ``set_limits`` ~~~~~~~~~~~~~~ .. method:: SearchQuery.set_limits(self, low=None, high=None) Restricts the query by altering either the start, end or both offsets. ``clear_limits`` ~~~~~~~~~~~~~~~~ .. method:: SearchQuery.clear_limits(self) Clears any existing limits. ``add_boost`` ~~~~~~~~~~~~~ .. method:: SearchQuery.add_boost(self, term, boost_value) Adds a boosted term and the amount to boost it to the query. ``raw_search`` ~~~~~~~~~~~~~~ .. method:: SearchQuery.raw_search(self, query_string, **kwargs) Runs a raw query (no parsing) against the backend. This method causes the ``SearchQuery`` to ignore the standard query-generating facilities, running only what was provided instead. Note that any kwargs passed along will override anything provided to the rest of the ``SearchQuerySet``. ``more_like_this`` ~~~~~~~~~~~~~~~~~~ .. method:: SearchQuery.more_like_this(self, model_instance) Allows backends with support for "More Like This" to return results similar to the provided instance. ``add_stats_query`` ~~~~~~~~~~~~~~~~~~~ .. method:: SearchQuery.add_stats_query(self,stats_field,stats_facets) Adds stats and stats_facets queries for the Solr backend. ``add_highlight`` ~~~~~~~~~~~~~~~~~ .. method:: SearchQuery.add_highlight(self) Adds highlighting to the search results. ``add_within`` ~~~~~~~~~~~~~~ .. method:: SearchQuery.add_within(self, field, point_1, point_2): Adds bounding box parameters to search query. ``add_dwithin`` ~~~~~~~~~~~~~~~ .. method:: SearchQuery.add_dwithin(self, field, point, distance): Adds radius-based parameters to search query. ``add_distance`` ~~~~~~~~~~~~~~~~ .. method:: SearchQuery.add_distance(self, field, point): Denotes that results should include distance measurements from the point passed in. ``add_field_facet`` ~~~~~~~~~~~~~~~~~~~ .. method:: SearchQuery.add_field_facet(self, field, **options) Adds a regular facet on a field. ``add_date_facet`` ~~~~~~~~~~~~~~~~~~ .. method:: SearchQuery.add_date_facet(self, field, start_date, end_date, gap_by, gap_amount) Adds a date-based facet on a field. ``add_query_facet`` ~~~~~~~~~~~~~~~~~~~ .. method:: SearchQuery.add_query_facet(self, field, query) Adds a query facet on a field. ``add_narrow_query`` ~~~~~~~~~~~~~~~~~~~~ .. method:: SearchQuery.add_narrow_query(self, query) Narrows a search to a subset of all documents per the query. Generally used in conjunction with faceting. ``set_result_class`` ~~~~~~~~~~~~~~~~~~~~ .. method:: SearchQuery.set_result_class(self, klass) Sets the result class to use for results. Overrides any previous usages. If ``None`` is provided, Haystack will revert back to the default ``SearchResult`` object. ``using`` ~~~~~~~~~ .. method:: SearchQuery.using(self, using=None) Allows for overriding which connection should be used. This disables the use of routers when performing the query. If ``None`` is provided, it has no effect on what backend is used. django-haystack-2.8.0/docs/searchqueryset_api.rst000066400000000000000000000753041325051407000221600ustar00rootroot00000000000000.. _ref-searchqueryset-api: ====================== ``SearchQuerySet`` API ====================== .. class:: SearchQuerySet(using=None, query=None) The ``SearchQuerySet`` class is designed to make performing a search and iterating over its results easy and consistent. For those familiar with Django's ORM ``QuerySet``, much of the ``SearchQuerySet`` API should feel familiar. Why Follow ``QuerySet``? ======================== A couple reasons to follow (at least in part) the ``QuerySet`` API: #. Consistency with Django #. Most Django programmers have experience with the ORM and can use this knowledge with ``SearchQuerySet``. And from a high-level perspective, ``QuerySet`` and ``SearchQuerySet`` do very similar things: given certain criteria, provide a set of results. Both are powered by multiple backends, both are abstractions on top of the way a query is performed. Quick Start =========== For the impatient:: from haystack.query import SearchQuerySet all_results = SearchQuerySet().all() hello_results = SearchQuerySet().filter(content='hello') hello_world_results = SearchQuerySet().filter(content='hello world') unfriendly_results = SearchQuerySet().exclude(content='hello').filter(content='world') recent_results = SearchQuerySet().order_by('-pub_date')[:5] # Using the new input types... from haystack.inputs import AutoQuery, Exact, Clean sqs = SearchQuerySet().filter(content=AutoQuery(request.GET['q']), product_type=Exact('ancient book')) if request.GET['product_url']: sqs = sqs.filter(product_url=Clean(request.GET['product_url'])) For more on the ``AutoQuery``, ``Exact``, ``Clean`` classes & friends, see the :ref:`ref-inputtypes` documentation. ``SearchQuerySet`` ================== By default, ``SearchQuerySet`` provide the documented functionality. You can extend with your own behavior by simply subclassing from ``SearchQuerySet`` and adding what you need, then using your subclass in place of ``SearchQuerySet``. Most methods in ``SearchQuerySet`` "chain" in a similar fashion to ``QuerySet``. Additionally, like ``QuerySet``, ``SearchQuerySet`` is lazy (meaning it evaluates the query as late as possible). So the following is valid:: from haystack.query import SearchQuerySet results = SearchQuerySet().exclude(content='hello').filter(content='world').order_by('-pub_date').boost('title', 0.5)[10:20] The ``content`` Shortcut ======================== Searching your document fields is a very common activity. To help mitigate possible differences in ``SearchField`` names (and to help the backends deal with search queries that inspect the main corpus), there is a special field called ``content``. You may use this in any place that other fields names would work (e.g. ``filter``, ``exclude``, etc.) to indicate you simply want to search the main documents. For example:: from haystack.query import SearchQuerySet # This searches whatever fields were marked ``document=True``. results = SearchQuerySet().exclude(content='hello') This special pseudo-field works best with the ``exact`` lookup and may yield strange or unexpected results with the other lookups. ``SearchQuerySet`` Methods ========================== The primary interface to search in Haystack is through the ``SearchQuerySet`` object. It provides a clean, programmatic, portable API to the search backend. Many aspects are also "chainable", meaning you can call methods one after another, each applying their changes to the previous ``SearchQuerySet`` and further narrowing the search. All ``SearchQuerySet`` objects implement a list-like interface, meaning you can perform actions like getting the length of the results, accessing a result at an offset or even slicing the result list. Methods That Return A ``SearchQuerySet`` ---------------------------------------- ``all`` ~~~~~~~ .. method:: SearchQuerySet.all(self): Returns all results for the query. This is largely a no-op (returns an identical copy) but useful for denoting exactly what behavior is going on. ``none`` ~~~~~~~~ .. method:: SearchQuerySet.none(self): Returns an ``EmptySearchQuerySet`` that behaves like a ``SearchQuerySet`` but always yields no results. ``filter`` ~~~~~~~~~~ .. method:: SearchQuerySet.filter(self, **kwargs) Filters the search by looking for (and including) certain attributes. The lookup parameters (``**kwargs``) should follow the `Field lookups`_ below. If you specify more than one pair, they will be joined in the query according to the ``HAYSTACK_DEFAULT_OPERATOR`` setting (defaults to ``AND``). You can pass it either strings or a variety of :ref:`ref-inputtypes` if you need more advanced query behavior. .. warning:: Any data you pass to ``filter`` gets auto-escaped. If you need to send non-escaped data, use the ``Raw`` input type (:ref:`ref-inputtypes`). Also, if a string with one or more spaces in it is specified as the value, the string will get passed along **AS IS**. This will mean that it will **NOT** be treated as a phrase (like Haystack 1.X's behavior). If you want to match a phrase, you should use either the ``__exact`` filter type or the ``Exact`` input type (:ref:`ref-inputtypes`). Examples:: sqs = SearchQuerySet().filter(content='foo') sqs = SearchQuerySet().filter(content='foo', pub_date__lte=datetime.date(2008, 1, 1)) # Identical to the previous example. sqs = SearchQuerySet().filter(content='foo').filter(pub_date__lte=datetime.date(2008, 1, 1)) # To send unescaped data: from haystack.inputs import Raw sqs = SearchQuerySet().filter(title=Raw(trusted_query)) # To use auto-query behavior on a non-``document=True`` field. from haystack.inputs import AutoQuery sqs = SearchQuerySet().filter(title=AutoQuery(user_query)) ``exclude`` ~~~~~~~~~~~ .. method:: SearchQuerySet.exclude(self, **kwargs) Narrows the search by ensuring certain attributes are not included. .. warning:: Any data you pass to ``exclude`` gets auto-escaped. If you need to send non-escaped data, use the ``Raw`` input type (:ref:`ref-inputtypes`). Example:: sqs = SearchQuerySet().exclude(content='foo') ``filter_and`` ~~~~~~~~~~~~~~ .. method:: SearchQuerySet.filter_and(self, **kwargs) Narrows the search by looking for (and including) certain attributes. Join behavior in the query is forced to be ``AND``. Used primarily by the ``filter`` method. ``filter_or`` ~~~~~~~~~~~~~ .. method:: SearchQuerySet.filter_or(self, **kwargs) Narrows the search by looking for (and including) certain attributes. Join behavior in the query is forced to be ``OR``. Used primarily by the ``filter`` method. ``order_by`` ~~~~~~~~~~~~ .. method:: SearchQuerySet.order_by(self, *args) Alters the order in which the results should appear. Arguments should be strings that map to the attributes/fields within the index. You may specify multiple fields by comma separating them:: SearchQuerySet().filter(content='foo').order_by('author', 'pub_date') Default behavior is ascending order. To specify descending order, prepend the string with a ``-``:: SearchQuerySet().filter(content='foo').order_by('-pub_date') .. note:: In general, ordering is locale-specific. Haystack makes no effort to try to reconcile differences between characters from different languages. This means that accented characters will sort closely with the same character and **NOT** necessarily close to the unaccented form of the character. If you want this kind of behavior, you should override the ``prepare_FOO`` methods on your ``SearchIndex`` objects to transliterate the characters as you see fit. ``highlight`` ~~~~~~~~~~~~~ .. method:: SearchQuerySet.highlight(self) If supported by the backend, the ``SearchResult`` objects returned will include a highlighted version of the result:: sqs = SearchQuerySet().filter(content='foo').highlight() result = sqs[0] result.highlighted['text'][0] # u'Two computer scientists walk into a bar. The bartender says "Foo!".' The default functionality of the highlighter may not suit your needs. You can pass additional keyword arguments to ``highlight`` that will ultimately be used to build the query for your backend. Depending on the available arguments for your backend, you may need to pass in a dictionary instead of normal keyword arguments:: # Solr defines the fields to higlight by the ``hl.fl`` param. If not specified, we # would only get `text` back in the ``highlighted`` dict. kwargs = { 'hl.fl': 'other_field', 'hl.simple.pre': '', 'hl.simple.post': '' } sqs = SearchQuerySet().filter(content='foo').highlight(**kwargs) result = sqs[0] result.highlighted['other_field'][0] # u'Two computer scientists walk into a bar. The bartender says "Foo!".' ``models`` ~~~~~~~~~~ .. method:: SearchQuerySet.models(self, *models) Accepts an arbitrary number of Model classes to include in the search. This will narrow the search results to only include results from the models specified. Example:: SearchQuerySet().filter(content='foo').models(BlogEntry, Comment) ``result_class`` ~~~~~~~~~~~~~~~~ .. method:: SearchQuerySet.result_class(self, klass) Allows specifying a different class to use for results. Overrides any previous usages. If ``None`` is provided, Haystack will revert back to the default ``SearchResult`` object. Example:: SearchQuerySet().result_class(CustomResult) ``boost`` ~~~~~~~~~ .. method:: SearchQuerySet.boost(self, term, boost_value) Boosts a certain term of the query. You provide the term to be boosted and the value is the amount to boost it by. Boost amounts may be either an integer or a float. Example:: SearchQuerySet().filter(content='foo').boost('bar', 1.5) ``facet`` ~~~~~~~~~ .. method:: SearchQuerySet.facet(self, field, **options) Adds faceting to a query for the provided field. You provide the field (from one of the ``SearchIndex`` classes) you like to facet on. Any keyword options you provide will be passed along to the backend for that facet. Example:: # For SOLR (setting f.author.facet.*; see http://wiki.apache.org/solr/SimpleFacetParameters#Parameters) SearchQuerySet().facet('author', mincount=1, limit=10) # For Elasticsearch (see http://www.elasticsearch.org/guide/reference/api/search/facets/terms-facet.html) SearchQuerySet().facet('author', size=10, order='term') In the search results you get back, facet counts will be populated in the ``SearchResult`` object. You can access them via the ``facet_counts`` method. Example:: # Count document hits for each author within the index. SearchQuerySet().filter(content='foo').facet('author') ``date_facet`` ~~~~~~~~~~~~~~ .. method:: SearchQuerySet.date_facet(self, field, start_date, end_date, gap_by, gap_amount=1) Adds faceting to a query for the provided field by date. You provide the field (from one of the ``SearchIndex`` classes) you like to facet on, a ``start_date`` (either ``datetime.datetime`` or ``datetime.date``), an ``end_date`` and the amount of time between gaps as ``gap_by`` (one of ``'year'``, ``'month'``, ``'day'``, ``'hour'``, ``'minute'`` or ``'second'``). You can also optionally provide a ``gap_amount`` to specify a different increment than ``1``. For example, specifying gaps by week (every seven days) would be ``gap_by='day', gap_amount=7``). In the search results you get back, facet counts will be populated in the ``SearchResult`` object. You can access them via the ``facet_counts`` method. Example:: # Count document hits for each day between 2009-06-07 to 2009-07-07 within the index. SearchQuerySet().filter(content='foo').date_facet('pub_date', start_date=datetime.date(2009, 6, 7), end_date=datetime.date(2009, 7, 7), gap_by='day') ``query_facet`` ~~~~~~~~~~~~~~~ .. method:: SearchQuerySet.query_facet(self, field, query) Adds faceting to a query for the provided field with a custom query. You provide the field (from one of the ``SearchIndex`` classes) you like to facet on and the backend-specific query (as a string) you'd like to execute. Please note that this is **NOT** portable between backends. The syntax is entirely dependent on the backend. No validation/cleansing is performed and it is up to the developer to ensure the query's syntax is correct. In the search results you get back, facet counts will be populated in the ``SearchResult`` object. You can access them via the ``facet_counts`` method. Example:: # Count document hits for authors that start with 'jo' within the index. SearchQuerySet().filter(content='foo').query_facet('author', 'jo*') ``within`` ~~~~~~~~~~ .. method:: SearchQuerySet.within(self, field, point_1, point_2): Spatial: Adds a bounding box search to the query. See the :ref:`ref-spatial` docs for more information. ``dwithin`` ~~~~~~~~~~~ .. method:: SearchQuerySet.dwithin(self, field, point, distance): Spatial: Adds a distance-based search to the query. See the :ref:`ref-spatial` docs for more information. ``stats`` ~~~~~~~~~ .. method:: SearchQuerySet.stats(self, field): Adds stats to a query for the provided field. This is supported on Solr only. You provide the field (from one of the ``SearchIndex`` classes) you would like stats on. In the search results you get back, stats will be populated in the ``SearchResult`` object. You can access them via the `` stats_results`` method. Example:: # Get stats on the author field. SearchQuerySet().filter(content='foo').stats('author') ``stats_facet`` ~~~~~~~~~~~~~~~ .. method:: SearchQuerySet.stats_facet(self, field, .. facet_fields=None): Adds stats facet for the given field and facet_fields represents the faceted fields. This is supported on Solr only. Example:: # Get stats on the author field, and stats on the author field faceted by bookstore. SearchQuerySet().filter(content='foo').stats_facet('author','bookstore') ``distance`` ~~~~~~~~~~~~ .. method:: SearchQuerySet.distance(self, field, point): Spatial: Denotes results must have distance measurements from the provided point. See the :ref:`ref-spatial` docs for more information. ``narrow`` ~~~~~~~~~~ .. method:: SearchQuerySet.narrow(self, query) Pulls a subset of documents from the search engine to search within. This is for advanced usage, especially useful when faceting. Example:: # Search, from recipes containing 'blend', for recipes containing 'banana'. SearchQuerySet().narrow('blend').filter(content='banana') # Using a fielded search where the recipe's title contains 'smoothie', find all recipes published before 2009. SearchQuerySet().narrow('title:smoothie').filter(pub_date__lte=datetime.datetime(2009, 1, 1)) By using ``narrow``, you can create drill-down interfaces for faceting by applying ``narrow`` calls for each facet that gets selected. This method is different from ``SearchQuerySet.filter()`` in that it does not affect the query sent to the engine. It pre-limits the document set being searched. Generally speaking, if you're in doubt of whether to use ``filter`` or ``narrow``, use ``filter``. .. note:: This method is, generally speaking, not necessarily portable between backends. The syntax is entirely dependent on the backend, though most backends have a similar syntax for basic fielded queries. No validation/cleansing is performed and it is up to the developer to ensure the query's syntax is correct. ``raw_search`` ~~~~~~~~~~~~~~ .. method:: SearchQuerySet.raw_search(self, query_string, **kwargs) Passes a raw query directly to the backend. This is for advanced usage, where the desired query can not be expressed via ``SearchQuerySet``. This method is still supported, however it now uses the much more flexible ``Raw`` input type (:ref:`ref-inputtypes`). .. warning:: Different from Haystack 1.X, this method no longer causes immediate evaluation & now chains appropriately. Example:: # In the case of Solr... (this example could be expressed with SearchQuerySet) SearchQuerySet().raw_search('django_ct:blog.blogentry "However, it is"') # Equivalent. from haystack.inputs import Raw sqs = SearchQuerySet().filter(content=Raw('django_ct:blog.blogentry "However, it is"')) Please note that this is **NOT** portable between backends. The syntax is entirely dependent on the backend. No validation/cleansing is performed and it is up to the developer to ensure the query's syntax is correct. Further, the use of ``**kwargs`` are completely undocumented intentionally. If a third-party backend can implement special features beyond what's present, it should use those ``**kwargs`` for passing that information. Developers should be careful to make sure there are no conflicts with the backend's ``search`` method, as that is called directly. ``load_all`` ~~~~~~~~~~~~ .. method:: SearchQuerySet.load_all(self) Efficiently populates the objects in the search results. Without using this method, DB lookups are done on a per-object basis, resulting in many individual trips to the database. If ``load_all`` is used, the ``SearchQuerySet`` will group similar objects into a single query, resulting in only as many queries as there are different object types returned. Example:: SearchQuerySet().filter(content='foo').load_all() ``auto_query`` ~~~~~~~~~~~~~~ .. method:: SearchQuerySet.auto_query(self, query_string, fieldname=None) Performs a best guess constructing the search query. This method is intended for common use directly with a user's query. This method is still supported, however it now uses the much more flexible ``AutoQuery`` input type (:ref:`ref-inputtypes`). It handles exact matches (specified with single or double quotes), negation ( using a ``-`` immediately before the term) and joining remaining terms with the operator specified in ``HAYSTACK_DEFAULT_OPERATOR``. Example:: sqs = SearchQuerySet().auto_query('goldfish "old one eye" -tank') # Equivalent. from haystack.inputs import AutoQuery sqs = SearchQuerySet().filter(content=AutoQuery('goldfish "old one eye" -tank')) # Against a different field. sqs = SearchQuerySet().filter(title=AutoQuery('goldfish "old one eye" -tank')) ``autocomplete`` ~~~~~~~~~~~~~~~~ A shortcut method to perform an autocomplete search. Must be run against fields that are either ``NgramField`` or ``EdgeNgramField``. Example:: SearchQuerySet().autocomplete(title_autocomplete='gol') ``more_like_this`` ~~~~~~~~~~~~~~~~~~ .. method:: SearchQuerySet.more_like_this(self, model_instance) Finds similar results to the object passed in. You should pass in an instance of a model (for example, one fetched via a ``get`` in Django's ORM). This will execute a query on the backend that searches for similar results. The instance you pass in should be an indexed object. Previously called methods will have an effect on the provided results. It will evaluate its own backend-specific query and populate the ``SearchQuerySet`` in the same manner as other methods. Example:: entry = Entry.objects.get(slug='haystack-one-oh-released') mlt = SearchQuerySet().more_like_this(entry) mlt.count() # 5 mlt[0].object.title # "Haystack Beta 1 Released" # ...or... mlt = SearchQuerySet().filter(public=True).exclude(pub_date__lte=datetime.date(2009, 7, 21)).more_like_this(entry) mlt.count() # 2 mlt[0].object.title # "Haystack Beta 1 Released" ``using`` ~~~~~~~~~ .. method:: SearchQuerySet.using(self, connection_name) Allows switching which connection the ``SearchQuerySet`` uses to search in. Example:: # Let the routers decide which connection to use. sqs = SearchQuerySet().all() # Specify the 'default'. sqs = SearchQuerySet().all().using('default') Methods That Do Not Return A ``SearchQuerySet`` ----------------------------------------------- ``count`` ~~~~~~~~~ .. method:: SearchQuerySet.count(self) Returns the total number of matching results. This returns an integer count of the total number of results the search backend found that matched. This method causes the query to evaluate and run the search. Example:: SearchQuerySet().filter(content='foo').count() ``best_match`` ~~~~~~~~~~~~~~ .. method:: SearchQuerySet.best_match(self) Returns the best/top search result that matches the query. This method causes the query to evaluate and run the search. This method returns a ``SearchResult`` object that is the best match the search backend found:: foo = SearchQuerySet().filter(content='foo').best_match() foo.id # Something like 5. # Identical to: foo = SearchQuerySet().filter(content='foo')[0] ``latest`` ~~~~~~~~~~ .. method:: SearchQuerySet.latest(self, date_field) Returns the most recent search result that matches the query. This method causes the query to evaluate and run the search. This method returns a ``SearchResult`` object that is the most recent match the search backend found:: foo = SearchQuerySet().filter(content='foo').latest('pub_date') foo.id # Something like 3. # Identical to: foo = SearchQuerySet().filter(content='foo').order_by('-pub_date')[0] ``facet_counts`` ~~~~~~~~~~~~~~~~ .. method:: SearchQuerySet.facet_counts(self) Returns the facet counts found by the query. This will cause the query to execute and should generally be used when presenting the data (template-level). You receive back a dictionary with three keys: ``fields``, ``dates`` and ``queries``. Each contains the facet counts for whatever facets you specified within your ``SearchQuerySet``. .. note:: The resulting dictionary may change before 1.0 release. It's fairly backend-specific at the time of writing. Standardizing is waiting on implementing other backends that support faceting and ensuring that the results presented will meet their needs as well. Example:: # Count document hits for each author. sqs = SearchQuerySet().filter(content='foo').facet('author') sqs.facet_counts() # Gives the following response: # { # 'dates': {}, # 'fields': { # 'author': [ # ('john', 4), # ('daniel', 2), # ('sally', 1), # ('terry', 1), # ], # }, # 'queries': {} # } ``stats_results`` ~~~~~~~~~~~~~~~~~ .. method:: SearchQuerySet.stats_results(self): Returns the stats results found by the query. This will cause the query to execute and should generally be used when presenting the data (template-level). You receive back a dictionary with three keys: ``fields``, ``dates`` and ``queries``. Each contains the facet counts for whatever facets you specified within your ``SearchQuerySet``. .. note:: The resulting dictionary may change before 1.0 release. It's fairly backend-specific at the time of writing. Standardizing is waiting on implementing other backends that support faceting and ensuring that the results presented will meet their needs as well. Example:: # Count document hits for each author. sqs = SearchQuerySet().filter(content='foo').stats('price') sqs.stats_results() # Gives the following response # { # 'stats_fields':{ # 'author:{ # 'min': 0.0, # 'max': 2199.0, # 'sum': 5251.2699999999995, # 'count': 15, # 'missing': 11, # 'sumOfSquares': 6038619.160300001, # 'mean': 350.08466666666664, # 'stddev': 547.737557906113 # } # } # # } ``set_spelling_query`` ~~~~~~~~~~~~~~~~~~~~~~~ .. method:: SearchQuerySet.set_spelling_query(self, spelling_query) This method allows you to set the text which will be passed to the backend search engine for spelling suggestions. This is helpful when the actual query being sent to the backend has complex syntax which should not be seen by the spelling suggestion component. In this example, a Solr ``edismax`` query is being used to boost field and document weights and ``set_spelling_query`` is being used to send only the actual user-entered text to the spellchecker:: alt_q = AltParser('edismax', self.query, qf='title^4 text provider^0.5', bq='django_ct:core.item^6.0') sqs = sqs.filter(content=alt_q) sqs = sqs.set_spelling_query(self.query) ``spelling_suggestion`` ~~~~~~~~~~~~~~~~~~~~~~~ .. method:: SearchQuerySet.spelling_suggestion(self, preferred_query=None) Returns the spelling suggestion found by the query. To work, you must set ``INCLUDE_SPELLING`` within your connection's settings dictionary to ``True``, and you must rebuild your index afterwards. Otherwise, ``None`` will be returned. This method causes the query to evaluate and run the search if it hasn't already run. Search results will be populated as normal but with an additional spelling suggestion. Note that this does *NOT* run the revised query, only suggests improvements. If provided, the optional argument to this method lets you specify an alternate query for the spelling suggestion to be run on. This is useful for passing along a raw user-provided query, especially when there are many methods chained on the ``SearchQuerySet``. Example:: sqs = SearchQuerySet().auto_query('mor exmples') sqs.spelling_suggestion() # u'more examples' # ...or... suggestion = SearchQuerySet().spelling_suggestion('moar exmples') suggestion # u'more examples' ``values`` ~~~~~~~~~~ .. method:: SearchQuerySet.values(self, *fields) Returns a list of dictionaries, each containing the key/value pairs for the result, exactly like Django's ``ValuesQuerySet``. This method causes the query to evaluate and run the search if it hasn't already run. You must provide a list of one or more fields as arguments. These fields will be the ones included in the individual results. Example:: sqs = SearchQuerySet().auto_query('banana').values('title', 'description') ``values_list`` ~~~~~~~~~~~~~~~ .. method:: SearchQuerySet.values_list(self, *fields, **kwargs) Returns a list of field values as tuples, exactly like Django's ``ValuesListQuerySet``. This method causes the query to evaluate and run the search if it hasn't already run. You must provide a list of one or more fields as arguments. These fields will be the ones included in the individual results. You may optionally also provide a ``flat=True`` kwarg, which in the case of a single field being provided, will return a flat list of that field rather than a list of tuples. Example:: sqs = SearchQuerySet().auto_query('banana').values_list('title', 'description') # ...or just the titles as a flat list... sqs = SearchQuerySet().auto_query('banana').values_list('title', flat=True) .. _field-lookups: Field Lookups ------------- The following lookup types are supported: * content * contains * exact * gt * gte * lt * lte * in * startswith * endswith * range * fuzzy Except for ``fuzzy`` these options are similar in function to the way Django's lookup types work. The actual behavior of these lookups is backend-specific. .. warning:: The ``startswith`` filter is strongly affected by the other ways the engine parses data, especially in regards to stemming (see :doc:`glossary`). This can mean that if the query ends in a vowel or a plural form, it may get stemmed before being evaluated. This is both backend-specific and yet fairly consistent between engines, and may be the cause of sometimes unexpected results. .. warning:: The ``content`` filter became the new default filter as of Haystack v2.X (the default in Haystack v1.X was ``exact``). This changed because ``exact`` caused problems and was unintuitive for new people trying to use Haystack. ``content`` is a much more natural usage. If you had an app built on Haystack v1.X & are upgrading, you'll need to sanity-check & possibly change any code that was relying on the default. The solution is just to add ``__exact`` to any "bare" field in a ``.filter(...)`` clause. Example:: SearchQuerySet().filter(content='foo') # Identical to: SearchQuerySet().filter(content__content='foo') # Phrase matching. SearchQuerySet().filter(content__exact='hello world') # Other usages look like: SearchQuerySet().filter(pub_date__gte=datetime.date(2008, 1, 1), pub_date__lt=datetime.date(2009, 1, 1)) SearchQuerySet().filter(author__in=['daniel', 'john', 'jane']) SearchQuerySet().filter(view_count__range=[3, 5]) ``EmptySearchQuerySet`` ======================= Also included in Haystack is an ``EmptySearchQuerySet`` class. It behaves just like ``SearchQuerySet`` but will always return zero results. This is useful for places where you want no query to occur or results to be returned. ``RelatedSearchQuerySet`` ========================= Sometimes you need to filter results based on relations in the database that are not present in the search index or are difficult to express that way. To this end, ``RelatedSearchQuerySet`` allows you to post-process the search results by calling ``load_all_queryset``. .. warning:: ``RelatedSearchQuerySet`` can have negative performance implications. Because results are excluded based on the database after the search query has been run, you can't guarantee offsets within the cache. Therefore, the entire cache that appears before the offset you request must be filled in order to produce consistent results. On large result sets and at higher slices, this can take time. This is the old behavior of ``SearchQuerySet``, so performance is no worse than the early days of Haystack. It supports all other methods that the standard ``SearchQuerySet`` does, with the addition of the ``load_all_queryset`` method and paying attention to the ``load_all_queryset`` method of ``SearchIndex`` objects when populating the cache. ``load_all_queryset`` --------------------- .. method:: RelatedSearchQuerySet.load_all_queryset(self, model_class, queryset) Allows for specifying a custom ``QuerySet`` that changes how ``load_all`` will fetch records for the provided model. This is useful for post-processing the results from the query, enabling things like adding ``select_related`` or filtering certain data. Example:: sqs = RelatedSearchQuerySet().filter(content='foo').load_all() # For the Entry model, we want to include related models directly associated # with the Entry to save on DB queries. sqs = sqs.load_all_queryset(Entry, Entry.objects.all().select_related(depth=1)) This method chains indefinitely, so you can specify ``QuerySets`` for as many models as you wish, one per model. The ``SearchQuerySet`` appends on a call to ``in_bulk``, so be sure that the ``QuerySet`` you provide can accommodate this and that the ids passed to ``in_bulk`` will map to the model in question. If you need to do this frequently and have one ``QuerySet`` you'd like to apply everywhere, you can specify this at the ``SearchIndex`` level using the ``load_all_queryset`` method. See :doc:`searchindex_api` for usage. django-haystack-2.8.0/docs/searchresult_api.rst000066400000000000000000000035411325051407000216070ustar00rootroot00000000000000.. _ref-searchresult-api: ==================== ``SearchResult`` API ==================== .. class:: SearchResult(app_label, model_name, pk, score, **kwargs) The ``SearchResult`` class provides structure to the results that come back from the search index. These objects are what a ``SearchQuerySet`` will return when evaluated. Attribute Reference =================== The class exposes the following useful attributes/properties: * ``app_label`` - The application the model is attached to. * ``model_name`` - The model's name. * ``pk`` - The primary key of the model. * ``score`` - The score provided by the search engine. * ``object`` - The actual model instance (lazy loaded). * ``model`` - The model class. * ``verbose_name`` - A prettier version of the model's class name for display. * ``verbose_name_plural`` - A prettier version of the model's *plural* class name for display. * ``searchindex`` - Returns the ``SearchIndex`` class associated with this result. * ``distance`` - On geo-spatial queries, this returns a ``Distance`` object representing the distance the result was from the focused point. Method Reference ================ ``content_type`` ---------------- .. method:: SearchResult.content_type(self) Returns the content type for the result's model instance. ``get_additional_fields`` ------------------------- .. method:: SearchResult.get_additional_fields(self) Returns a dictionary of all of the fields from the raw result. Useful for serializing results. Only returns what was seen from the search engine, so it may have extra fields Haystack's indexes aren't aware of. ``get_stored_fields`` --------------------- .. method:: SearchResult.get_stored_fields(self) Returns a dictionary of all of the stored fields from the SearchIndex. Useful for serializing results. Only returns the fields Haystack's indexes are aware of as being 'stored'. django-haystack-2.8.0/docs/settings.rst000066400000000000000000000200721325051407000201100ustar00rootroot00000000000000.. _ref-settings: ================= Haystack Settings ================= As a way to extend/change the default behavior within Haystack, there are several settings you can alter within your ``settings.py``. This is a comprehensive list of the settings Haystack recognizes. ``HAYSTACK_DEFAULT_OPERATOR`` ============================= **Optional** This setting controls what the default behavior for chaining ``SearchQuerySet`` filters together is. Valid options are:: HAYSTACK_DEFAULT_OPERATOR = 'AND' HAYSTACK_DEFAULT_OPERATOR = 'OR' Defaults to ``AND``. ``HAYSTACK_CONNECTIONS`` ======================== **Required** This setting controls which backends should be available. It should be a dictionary of dictionaries resembling the following (complete) example:: HAYSTACK_CONNECTIONS = { 'default': { 'ENGINE': 'haystack.backends.solr_backend.SolrEngine', 'URL': 'http://localhost:9001/solr/default', 'TIMEOUT': 60 * 5, 'INCLUDE_SPELLING': True, 'BATCH_SIZE': 100, 'EXCLUDED_INDEXES': ['thirdpartyapp.search_indexes.BarIndex'], }, 'autocomplete': { 'ENGINE': 'haystack.backends.whoosh_backend.WhooshEngine', 'PATH': '/home/search/whoosh_index', 'STORAGE': 'file', 'POST_LIMIT': 128 * 1024 * 1024, 'INCLUDE_SPELLING': True, 'BATCH_SIZE': 100, 'EXCLUDED_INDEXES': ['thirdpartyapp.search_indexes.BarIndex'], }, 'slave': { 'ENGINE': 'xapian_backend.XapianEngine', 'PATH': '/home/search/xapian_index', 'INCLUDE_SPELLING': True, 'BATCH_SIZE': 100, 'EXCLUDED_INDEXES': ['thirdpartyapp.search_indexes.BarIndex'], }, 'db': { 'ENGINE': 'haystack.backends.simple_backend.SimpleEngine', 'EXCLUDED_INDEXES': ['thirdpartyapp.search_indexes.BarIndex'], } } No default for this setting is provided. The main keys (``default`` & friends) are identifiers for your application. You can use them any place the API exposes ``using`` as a method or kwarg. There must always be at least a ``default`` key within this setting. The ``ENGINE`` option is required for all backends & should point to the ``BaseEngine`` subclass for the backend. Additionally, each backend may have additional options it requires: * Solr * ``URL`` - The URL to the Solr core. e.g. http://localhost:9001/solr/collection1 * ``ADMIN_URL`` - The URL to the administrative functions. e.g. http://localhost:9001/solr/admin/cores * Whoosh * ``PATH`` - The filesystem path to where the index data is located. * Xapian * ``PATH`` - The filesystem path to where the index data is located. The following options are optional: * ``INCLUDE_SPELLING`` - Include spelling suggestions. Default is ``False`` * ``BATCH_SIZE`` - How many records should be updated at once via the management commands. Default is ``1000``. * ``TIMEOUT`` - (Solr and ElasticSearch) How long to wait (in seconds) before the connection times out. Default is ``10``. * ``STORAGE`` - (Whoosh-only) Which storage engine to use. Accepts ``file`` or ``ram``. Default is ``file``. * ``POST_LIMIT`` - (Whoosh-only) How large the file sizes can be. Default is ``128 * 1024 * 1024``. * ``FLAGS`` - (Xapian-only) A list of flags to use when querying the index. * ``EXCLUDED_INDEXES`` - A list of strings (as Python import paths) to indexes you do **NOT** want included. Useful for omitting third-party things you don't want indexed or for when you want to replace an index. * ``KWARGS`` - (Solr and ElasticSearch) Any additional keyword arguments that should be passed on to the underlying client library. ``HAYSTACK_ROUTERS`` ==================== **Optional** This setting controls how routing is performed to allow different backends to handle updates/deletes/reads. An example:: HAYSTACK_ROUTERS = ['search_routers.MasterSlaveRouter', 'haystack.routers.DefaultRouter'] Defaults to ``['haystack.routers.DefaultRouter']``. ``HAYSTACK_SIGNAL_PROCESSOR`` ============================= **Optional** This setting controls what ``SignalProcessor`` class is used to handle Django's signals & keep the search index up-to-date. An example:: HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.RealtimeSignalProcessor' Defaults to ``'haystack.signals.BaseSignalProcessor'``. ``HAYSTACK_DOCUMENT_FIELD`` =========================== **Optional** This setting controls what fieldname Haystack relies on as the default field for searching within. An example:: HAYSTACK_DOCUMENT_FIELD = 'wall_o_text' Defaults to ``text``. ``HAYSTACK_SEARCH_RESULTS_PER_PAGE`` ==================================== **Optional** This setting controls how many results are shown per page when using the included ``SearchView`` and its subclasses. An example:: HAYSTACK_SEARCH_RESULTS_PER_PAGE = 50 Defaults to ``20``. ``HAYSTACK_CUSTOM_HIGHLIGHTER`` =============================== **Optional** This setting allows you to specify your own custom ``Highlighter`` implementation for use with the ``{% highlight %}`` template tag. It should be the full path to the class. An example:: HAYSTACK_CUSTOM_HIGHLIGHTER = 'myapp.utils.BorkHighlighter' No default is provided. Haystack automatically falls back to the default implementation. ``HAYSTACK_ITERATOR_LOAD_PER_QUERY`` ==================================== **Optional** This setting controls the number of results that are pulled at once when iterating through a ``SearchQuerySet``. If you generally consume large portions at a time, you can bump this up for better performance. .. note:: This is not used in the case of a slice on a ``SearchQuerySet``, which already overrides the number of results pulled at once. An example:: HAYSTACK_ITERATOR_LOAD_PER_QUERY = 100 The default is 10 results at a time. ``HAYSTACK_LIMIT_TO_REGISTERED_MODELS`` ======================================= **Optional** This setting allows you to control whether or not Haystack will limit the search results seen to just the models registered. It should be a boolean. If your search index is never used for anything other than the models registered with Haystack, you can turn this off and get a small to moderate performance boost. An example:: HAYSTACK_LIMIT_TO_REGISTERED_MODELS = False Default is ``True``. ``HAYSTACK_ID_FIELD`` ===================== **Optional** This setting allows you to control what the unique field name used internally by Haystack is called. Rarely needed unless your field names collide with Haystack's defaults. An example:: HAYSTACK_ID_FIELD = 'my_id' Default is ``id``. ``HAYSTACK_DJANGO_CT_FIELD`` ============================ **Optional** This setting allows you to control what the content type field name used internally by Haystack is called. Rarely needed unless your field names collide with Haystack's defaults. An example:: HAYSTACK_DJANGO_CT_FIELD = 'my_django_ct' Default is ``django_ct``. ``HAYSTACK_DJANGO_ID_FIELD`` ============================ **Optional** This setting allows you to control what the primary key field name used internally by Haystack is called. Rarely needed unless your field names collide with Haystack's defaults. An example:: HAYSTACK_DJANGO_ID_FIELD = 'my_django_id' Default is ``django_id``. ``HAYSTACK_IDENTIFIER_METHOD`` ============================== **Optional** This setting allows you to provide a custom method for ``haystack.utils.get_identifier``. Useful when the default identifier pattern of .. isn't suited to your needs. An example:: HAYSTACK_IDENTIFIER_METHOD = 'my_app.module.get_identifier' Default is ``haystack.utils.default_get_identifier``. ``HAYSTACK_FUZZY_MIN_SIM`` ========================== **Optional** This setting allows you to change the required similarity when using ``fuzzy`` filter. Default is ``0.5`` ``HAYSTACK_FUZZY_MAX_EXPANSIONS`` ================================= **Optional** This setting allows you to change the number of terms fuzzy queries will expand to when using ``fuzzy`` filter. Default is ``50`` django-haystack-2.8.0/docs/signal_processors.rst000066400000000000000000000113261325051407000220110ustar00rootroot00000000000000.. _ref-signal_processors: ================= Signal Processors ================= Keeping data in sync between the (authoritative) database & the (non-authoritative) search index is one of the more difficult problems when using Haystack. Even frequently running the ``update_index`` management command still introduces lag between when the data is stored & when it's available for searching. A solution to this is to incorporate Django's signals (specifically ``models.db.signals.post_save`` & ``models.db.signals.post_delete``), which then trigger *individual* updates to the search index, keeping them in near-perfect sync. Older versions of Haystack (pre-v2.0) tied the ``SearchIndex`` directly to the signals, which caused occasional conflicts of interest with third-party applications. To solve this, starting with Haystack v2.0, the concept of a ``SignalProcessor`` has been introduced. In it's simplest form, the ``SignalProcessor`` listens to whatever signals are setup & can be configured to then trigger the updates without having to change any ``SearchIndex`` code. .. warning:: Incorporating Haystack's ``SignalProcessor`` into your setup **will** increase the overall load (CPU & perhaps I/O depending on configuration). You will need to capacity plan for this & ensure you can make the tradeoff of more real-time results for increased load. Default - ``BaseSignalProcessor`` ================================= The default setup is configured to use the ``haystack.signals.BaseSignalProcessor`` class, which includes all the underlying code necessary to handle individual updates/deletes, **BUT DOES NOT HOOK UP THE SIGNALS**. This means that, by default, **NO ACTION IS TAKEN BY HAYSTACK** when a model is saved or deleted. The ``BaseSignalProcessor.setup`` & ``BaseSignalProcessor.teardown`` methods are both empty to prevent anything from being setup at initialization time. This usage is configured very simply (again, by default) with the ``HAYSTACK_SIGNAL_PROCESSOR`` setting. An example of manually setting this would look like:: HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.BaseSignalProcessor' This class forms an excellent base if you'd like to override/extend for more advanced behavior. Which leads us to... Realtime - ``RealtimeSignalProcessor`` ====================================== The other included ``SignalProcessor`` is the ``haystack.signals.RealtimeSignalProcessor`` class. It is an extremely thin extension of the ``BaseSignalProcessor`` class, differing only in that in implements the ``setup/teardown`` methods, tying **ANY** Model ``save/delete`` to the signal processor. If the model has an associated ``SearchIndex``, the ``RealtimeSignalProcessor`` will then trigger an update/delete of that model instance within the search index proper. Configuration looks like:: HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.RealtimeSignalProcessor' This causes **all** ``SearchIndex`` classes to work in a realtime fashion. .. note:: These updates happen in-process, which if a request-response cycle is involved, may cause the user with the browser to sit & wait for indexing to be completed. Since this wait can be undesirable, especially under load, you may wish to look into queued search options. See the :ref:`ref-other_apps` documentation for existing options. Custom ``SignalProcessors`` =========================== The ``BaseSignalProcessor`` & ``RealtimeSignalProcessor`` classes are fairly simple/straightforward to customize or extend. Rather than forking Haystack to implement your modifications, you should create your own subclass within your codebase (anywhere that's importable is usually fine, though you should avoid ``models.py`` files). For instance, if you only wanted ``User`` saves to be realtime, deferring all other updates to the management commands, you'd implement the following code:: from django.contrib.auth.models import User from django.db import models from haystack import signals class UserOnlySignalProcessor(signals.BaseSignalProcessor): def setup(self): # Listen only to the ``User`` model. models.signals.post_save.connect(self.handle_save, sender=User) models.signals.post_delete.connect(self.handle_delete, sender=User) def teardown(self): # Disconnect only for the ``User`` model. models.signals.post_save.disconnect(self.handle_save, sender=User) models.signals.post_delete.disconnect(self.handle_delete, sender=User) For other customizations (modifying how saves/deletes should work), you'll need to override/extend the ``handle_save/handle_delete`` methods. The source code is your best option for referring to how things currently work on your version of Haystack. django-haystack-2.8.0/docs/spatial.rst000066400000000000000000000340551325051407000177130ustar00rootroot00000000000000.. _ref-spatial: ============== Spatial Search ============== Spatial search (also called geospatial search) allows you to take data that has a geographic location & enhance the search results by limiting them to a physical area. Haystack, combined with the latest versions of a couple engines, can provide this type of search. In addition, Haystack tries to implement these features in a way that is as close to GeoDjango_ as possible. There are some differences, which we'll highlight throughout this guide. Additionally, while the support isn't as comprehensive as PostGIS (for example), it is still quite useful. .. _GeoDjango: https://docs.djangoproject.com/en/1.11/ref/contrib/gis/ Additional Requirements ======================= The spatial functionality has only one non-included, non-available-in-Django dependency: * ``geopy`` - ``pip install geopy`` If you do not ever need distance information, you may be able to skip installing ``geopy``. Support ======= You need the latest & greatest of either Solr or Elasticsearch. None of the other backends (specifially the engines) support this kind of search. For Solr_, you'll need at least **v3.5+**. In addition, if you have an existing install of Haystack & Solr, you'll need to upgrade the schema & reindex your data. If you're adding geospatial data, you would have to reindex anyhow. For Elasticsearch, you'll need at least v0.17.7, preferably v0.18.6 or better. If you're adding geospatial data, you'll have to reindex as well. .. _Solr: http://lucene.apache.org/solr/ ====================== ====== =============== ======== ======== ====== Lookup Type Solr Elasticsearch Whoosh Xapian Simple ====================== ====== =============== ======== ======== ====== `within` X X `dwithin` X X `distance` X X `order_by('distance')` X X `polygon` X ====================== ====== =============== ======== ======== ====== For more details, you can inspect http://wiki.apache.org/solr/SpatialSearch or http://www.elasticsearch.org/guide/reference/query-dsl/geo-bounding-box-filter.html. Geospatial Assumptions ====================== ``Points`` ---------- Haystack prefers to work with ``Point`` objects, which are located in ``django.contrib.gis.geos.Point`` but conviently importable out of ``haystack.utils.geo.Point``. ``Point`` objects use **LONGITUDE, LATITUDE** for their construction, regardless if you use the parameters to instantiate them or WKT_/``GEOSGeometry``. .. _WKT: http://en.wikipedia.org/wiki/Well-known_text Examples:: # Using positional arguments. from haystack.utils.geo import Point pnt = Point(-95.23592948913574, 38.97127105172941) # Using WKT. from django.contrib.gis.geos import GEOSGeometry pnt = GEOSGeometry('POINT(-95.23592948913574 38.97127105172941)') They are preferred over just providing ``latitude, longitude`` because they are more intelligent, have a spatial reference system attached & are more consistent with GeoDjango's use. ``Distance`` ------------ Haystack also uses the ``D`` (or ``Distance``) objects from GeoDjango, implemented in ``django.contrib.gis.measure.Distance`` but conveniently importable out of ``haystack.utils.geo.D`` (or ``haystack.utils.geo.Distance``). ``Distance`` objects accept a very flexible set of measurements during instantiaton and can convert amongst them freely. This is important, because the engines rely on measurements being in kilometers but you're free to use whatever units you want. Examples:: from haystack.utils.geo import D # Start at 5 miles. imperial_d = D(mi=5) # Convert to fathoms... fathom_d = imperial_d.fathom # Now to kilometers... km_d = imperial_d.km # And back to miles. mi = imperial_d.mi They are preferred over just providing a raw distance because they are more intelligent, have a well-defined unit system attached & are consistent with GeoDjango's use. ``WGS-84`` ---------- All engines assume WGS-84 (SRID 4326). At the time of writing, there does **not** appear to be a way to switch this. Haystack will transform all points into this coordinate system for you. Indexing ======== Indexing is relatively simple. Simply add a ``LocationField`` (or several) onto your ``SearchIndex`` class(es) & provide them a ``Point`` object. For example:: from haystack import indexes from shops.models import Shop class ShopIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) # ... the usual, then... location = indexes.LocationField(model_attr='coordinates') def get_model(self): return Shop If you must manually prepare the data, you have to do something slightly less convenient, returning a string-ified version of the coordinates in WGS-84 as ``lat,long``:: from haystack import indexes from shops.models import Shop class ShopIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) # ... the usual, then... location = indexes.LocationField() def get_model(self): return Shop def prepare_location(self, obj): # If you're just storing the floats... return "%s,%s" % (obj.latitude, obj.longitude) Alternatively, you could build a method/property onto the ``Shop`` model that returns a ``Point`` based on those coordinates:: # shops/models.py from django.contrib.gis.geos import Point from django.db import models class Shop(models.Model): # ... the usual, then... latitude = models.FloatField() longitude = models.FloatField() # Usual methods, then... def get_location(self): # Remember, longitude FIRST! return Point(self.longitude, self.latitude) # shops/search_indexes.py from haystack import indexes from shops.models import Shop class ShopIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) location = indexes.LocationField(model_attr='get_location') def get_model(self): return Shop Querying ======== There are two types of geospatial queries you can run, ``within`` & ``dwithin``. Like their GeoDjango counterparts (within_ & dwithin_), these methods focus on finding results within an area. .. _within: https://docs.djangoproject.com/en/dev/ref/contrib/gis/geoquerysets/#within .. _dwithin: https://docs.djangoproject.com/en/dev/ref/contrib/gis/geoquerysets/#dwithin ``within`` ---------- .. method:: SearchQuerySet.within(self, field, point_1, point_2) ``within`` is a bounding box comparison. A bounding box is a rectangular area within which to search. It's composed of a bottom-left point & a top-right point. It is faster but slighty sloppier than its counterpart. Examples:: from haystack.query import SearchQuerySet from haystack.utils.geo import Point downtown_bottom_left = Point(-95.23947, 38.9637903) downtown_top_right = Point(-95.23362278938293, 38.973081081164715) # 'location' is the fieldname from our ``SearchIndex``... # Do the bounding box query. sqs = SearchQuerySet().within('location', downtown_bottom_left, downtown_top_right) # Can be chained with other Haystack calls. sqs = SearchQuerySet().auto_query('coffee').within('location', downtown_bottom_left, downtown_top_right).order_by('-popularity') .. note:: In GeoDjango, assuming the ``Shop`` model had been properly geo-ified, this would have been implemented as:: from shops.models import Shop Shop.objects.filter(location__within=(downtown_bottom_left, downtown_top_right)) Haystack's form differs because it yielded a cleaner implementation, was no more typing than the GeoDjango version & tried to maintain the same terminology/similar signature. ``dwithin`` ----------- .. method:: SearchQuerySet.dwithin(self, field, point, distance) ``dwithin`` is a radius-based search. A radius-based search is a circular area within which to search. It's composed of a center point & a radius (in kilometers, though Haystack will use the ``D`` object's conversion utilities to get it there). It is slower than``within`` but very exact & can involve fewer calculations on your part. Examples:: from haystack.query import SearchQuerySet from haystack.utils.geo import Point, D ninth_and_mass = Point(-95.23592948913574, 38.96753407043678) # Within a two miles. max_dist = D(mi=2) # 'location' is the fieldname from our ``SearchIndex``... # Do the radius query. sqs = SearchQuerySet().dwithin('location', ninth_and_mass, max_dist) # Can be chained with other Haystack calls. sqs = SearchQuerySet().auto_query('coffee').dwithin('location', ninth_and_mass, max_dist).order_by('-popularity') .. note:: In GeoDjango, assuming the ``Shop`` model had been properly geo-ified, this would have been implemented as:: from shops.models import Shop Shop.objects.filter(location__dwithin=(ninth_and_mass, D(mi=2))) Haystack's form differs because it yielded a cleaner implementation, was no more typing than the GeoDjango version & tried to maintain the same terminology/similar signature. ``distance`` ------------ .. method:: SearchQuerySet.distance(self, field, point) By default, search results will come back without distance information attached to them. In the concept of a bounding box, it would be ambiguous what the distances would be calculated against. And it is more calculation that may not be necessary. So like GeoDjango, Haystack exposes a method to signify that you want to include these calculated distances on results. Examples:: from haystack.query import SearchQuerySet from haystack.utils.geo import Point, D ninth_and_mass = Point(-95.23592948913574, 38.96753407043678) # On a bounding box... downtown_bottom_left = Point(-95.23947, 38.9637903) downtown_top_right = Point(-95.23362278938293, 38.973081081164715) sqs = SearchQuerySet().within('location', downtown_bottom_left, downtown_top_right).distance('location', ninth_and_mass) # ...Or on a radius query. sqs = SearchQuerySet().dwithin('location', ninth_and_mass, D(mi=2)).distance('location', ninth_and_mass) You can even apply a different field, for instance if you calculate results of key, well-cached hotspots in town but want distances from the user's current position:: from haystack.query import SearchQuerySet from haystack.utils.geo import Point, D ninth_and_mass = Point(-95.23592948913574, 38.96753407043678) user_loc = Point(-95.23455619812012, 38.97240128290697) sqs = SearchQuerySet().dwithin('location', ninth_and_mass, D(mi=2)).distance('location', user_loc) .. note:: The astute will notice this is Haystack's biggest departure from GeoDjango. In GeoDjango, this would have been implemented as:: from shops.models import Shop Shop.objects.filter(location__dwithin=(ninth_and_mass, D(mi=2))).distance(user_loc) Note that, by default, the GeoDjango form leaves *out* the field to be calculating against (though it's possible to override it & specify the field). Haystack's form differs because the same assumptions are difficult to make. GeoDjango deals with a single model at a time, where Haystack deals with a broad mix of models. Additionally, accessing ``Model`` information is a couple hops away, so Haystack favors the explicit (if slightly more typing) approach. Ordering ======== Because you're dealing with search, even with geospatial queries, results still come back in **RELEVANCE** order. If you want to offer the user ordering results by distance, there's a simple way to enable this ordering. Using the standard Haystack ``order_by`` method, if you specify ``distance`` or ``-distance`` **ONLY**, you'll get geographic ordering. Additionally, you must have a call to ``.distance()`` somewhere in the chain, otherwise there is no distance information on the results & nothing to sort by. Examples:: from haystack.query import SearchQuerySet from haystack.utils.geo import Point, D ninth_and_mass = Point(-95.23592948913574, 38.96753407043678) downtown_bottom_left = Point(-95.23947, 38.9637903) downtown_top_right = Point(-95.23362278938293, 38.973081081164715) # Non-geo ordering. sqs = SearchQuerySet().within('location', downtown_bottom_left, downtown_top_right).order_by('title') sqs = SearchQuerySet().within('location', downtown_bottom_left, downtown_top_right).distance('location', ninth_and_mass).order_by('-created') # Geo ordering, closest to farthest. sqs = SearchQuerySet().within('location', downtown_bottom_left, downtown_top_right).distance('location', ninth_and_mass).order_by('distance') # Geo ordering, farthest to closest. sqs = SearchQuerySet().dwithin('location', ninth_and_mass, D(mi=2)).distance('location', ninth_and_mass).order_by('-distance') .. note:: This call is identical to the GeoDjango usage. .. warning:: You can not specify both a distance & lexicographic ordering. If you specify more than just ``distance`` or ``-distance``, Haystack assumes ``distance`` is a field in the index & tries to sort on it. Example:: # May blow up! sqs = SearchQuerySet().dwithin('location', ninth_and_mass, D(mi=2)).distance('location', ninth_and_mass).order_by('distance', 'title') This is a limitation in the engine's implementation. If you actually **have** a field called ``distance`` (& aren't using calculated distance information), Haystack will do the right thing in these circumstances. Caveats ======= In all cases, you may call the ``within/dwithin/distance`` methods as many times as you like. However, the **LAST** call is the information that will be used. No combination logic is available, as this is largely a backend limitation. Combining calls to both ``within`` & ``dwithin`` may yield unexpected or broken results. They don't overlap when performing queries, so it may be possible to construct queries that work. Your Mileage May Vary. django-haystack-2.8.0/docs/templatetags.rst000066400000000000000000000040671325051407000207500ustar00rootroot00000000000000.. _ref-templatetags: ============= Template Tags ============= Haystack comes with a couple common template tags to make using some of its special features available to templates. ``highlight`` ============= Takes a block of text and highlights words from a provided query within that block of text. Optionally accepts arguments to provide the HTML tag to wrap highlighted word in, a CSS class to use with the tag and a maximum length of the blurb in characters. The defaults are ``span`` for the HTML tag, ``highlighted`` for the CSS class and 200 characters for the excerpt. Syntax:: {% highlight with [css_class "class_name"] [html_tag "span"] [max_length 200] %} Example:: # Highlight summary with default behavior. {% highlight result.summary with query %} # Highlight summary but wrap highlighted words with a div and the # following CSS class. {% highlight result.summary with query html_tag "div" css_class "highlight_me_please" %} # Highlight summary but only show 40 words. {% highlight result.summary with query max_length 40 %} The highlighter used by this tag can be overridden as needed. See the :doc:`highlighting` documentation for more information. ``more_like_this`` ================== Fetches similar items from the search index to find content that is similar to the provided model's content. .. note:: This requires a backend that has More Like This built-in. Syntax:: {% more_like_this model_instance as varname [for app_label.model_name,app_label.model_name,...] [limit n] %} Example:: # Pull a full SearchQuerySet (lazy loaded) of similar content. {% more_like_this entry as related_content %} # Pull just the top 5 similar pieces of content. {% more_like_this entry as related_content limit 5 %} # Pull just the top 5 similar entries or comments. {% more_like_this entry as related_content for "blog.entry,comments.comment" limit 5 %} This tag behaves exactly like ``SearchQuerySet.more_like_this``, so all notes in that regard apply here as well. django-haystack-2.8.0/docs/toc.rst000066400000000000000000000013401325051407000170320ustar00rootroot00000000000000Table Of Contents ================= .. toctree:: :maxdepth: 2 index tutorial glossary views_and_forms templatetags management_commands architecture_overview backend_support installing_search_engines settings faq who_uses other_apps debugging migration_from_1_to_2 python3 contributing best_practices highlighting faceting autocomplete boost signal_processors multiple_index rich_content_extraction spatial searchqueryset_api searchindex_api inputtypes searchfield_api searchresult_api searchquery_api searchbackend_api running_tests creating_new_backends utils Indices and tables ================== * :ref:`search` django-haystack-2.8.0/docs/tutorial.rst000066400000000000000000000326401325051407000201170ustar00rootroot00000000000000.. _ref-tutorial: ============================= Getting Started with Haystack ============================= Search is a topic of ever increasing importance. Users increasing rely on search to separate signal from noise and find what they're looking for quickly. In addition, search can provide insight into what things are popular (many searches), what things are difficult to find on the site and ways you can improve the site. To this end, Haystack tries to make integrating custom search as easy as possible while being flexible/powerful enough to handle more advanced use cases. Haystack is a reusable app (that is, it relies only on its own code and focuses on providing just search) that plays nicely with both apps you control as well as third-party apps (such as ``django.contrib.*``) without having to modify the sources. Haystack also does pluggable backends (much like Django's database layer), so virtually all of the code you write ought to be portable between whichever search engine you choose. .. note:: If you hit a stumbling block, there is both a `mailing list`_ and `#haystack on irc.freenode.net`_ to get help. .. note:: You can participate in and/or track the development of Haystack by subscribing to the `development mailing list`_. .. _mailing list: http://groups.google.com/group/django-haystack .. _#haystack on irc.freenode.net: irc://irc.freenode.net/haystack .. _development mailing list: http://groups.google.com/group/django-haystack-dev This tutorial assumes that you have a basic familiarity with the various major parts of Django (models/forms/views/settings/URLconfs) and tailored to the typical use case. There are shortcuts available as well as hooks for much more advanced setups, but those will not be covered here. For example purposes, we'll be adding search functionality to a simple note-taking application. Here is ``myapp/models.py``:: from django.db import models from django.contrib.auth.models import User class Note(models.Model): user = models.ForeignKey(User) pub_date = models.DateTimeField() title = models.CharField(max_length=200) body = models.TextField() def __unicode__(self): return self.title Finally, before starting with Haystack, you will want to choose a search backend to get started. There is a quick-start guide to :doc:`installing_search_engines`, though you may want to defer to each engine's official instructions. Installation ============= Use your favorite Python package manager to install the app from PyPI, e.g. Example:: pip install django-haystack Configuration ============= Add Haystack To ``INSTALLED_APPS`` ---------------------------------- As with most Django applications, you should add Haystack to the ``INSTALLED_APPS`` within your settings file (usually ``settings.py``). Example:: INSTALLED_APPS = [ 'django.contrib.admin', 'django.contrib.auth', 'django.contrib.contenttypes', 'django.contrib.sessions', 'django.contrib.sites', # Added. 'haystack', # Then your usual apps... 'blog', ] Modify Your ``settings.py`` --------------------------- Within your ``settings.py``, you'll need to add a setting to indicate where your site configuration file will live and which backend to use, as well as other settings for that backend. ``HAYSTACK_CONNECTIONS`` is a required setting and should be at least one of the following: Solr ~~~~ Example:: HAYSTACK_CONNECTIONS = { 'default': { 'ENGINE': 'haystack.backends.solr_backend.SolrEngine', 'URL': 'http://127.0.0.1:8983/solr' # ...or for multicore... # 'URL': 'http://127.0.0.1:8983/solr/mysite', }, } Elasticsearch ~~~~~~~~~~~~~ Example (ElasticSearch 1.x):: HAYSTACK_CONNECTIONS = { 'default': { 'ENGINE': 'haystack.backends.elasticsearch_backend.ElasticsearchSearchEngine', 'URL': 'http://127.0.0.1:9200/', 'INDEX_NAME': 'haystack', }, } Example (ElasticSearch 2.x):: HAYSTACK_CONNECTIONS = { 'default': { 'ENGINE': 'haystack.backends.elasticsearch2_backend.Elasticsearch2SearchEngine', 'URL': 'http://127.0.0.1:9200/', 'INDEX_NAME': 'haystack', }, } Whoosh ~~~~~~ Requires setting ``PATH`` to the place on your filesystem where the Whoosh index should be located. Standard warnings about permissions and keeping it out of a place your webserver may serve documents out of apply. Example:: import os HAYSTACK_CONNECTIONS = { 'default': { 'ENGINE': 'haystack.backends.whoosh_backend.WhooshEngine', 'PATH': os.path.join(os.path.dirname(__file__), 'whoosh_index'), }, } Xapian ~~~~~~ First, install the Xapian backend (via http://github.com/notanumber/xapian-haystack/tree/master) per the instructions included with the backend. Requires setting ``PATH`` to the place on your filesystem where the Xapian index should be located. Standard warnings about permissions and keeping it out of a place your webserver may serve documents out of apply. Example:: import os HAYSTACK_CONNECTIONS = { 'default': { 'ENGINE': 'xapian_backend.XapianEngine', 'PATH': os.path.join(os.path.dirname(__file__), 'xapian_index'), }, } Simple ~~~~~~ The ``simple`` backend using very basic matching via the database itself. It's not recommended for production use but it will return results. .. warning:: This backend does *NOT* work like the other backends do. Data preparation does nothing & advanced filtering calls do not work. You really probably don't want this unless you're in an environment where you just want to silence Haystack. Example:: HAYSTACK_CONNECTIONS = { 'default': { 'ENGINE': 'haystack.backends.simple_backend.SimpleEngine', }, } Handling Data ============= Creating ``SearchIndexes`` -------------------------- ``SearchIndex`` objects are the way Haystack determines what data should be placed in the search index and handles the flow of data in. You can think of them as being similar to Django ``Models`` or ``Forms`` in that they are field-based and manipulate/store data. You generally create a unique ``SearchIndex`` for each type of ``Model`` you wish to index, though you can reuse the same ``SearchIndex`` between different models if you take care in doing so and your field names are very standardized. To build a ``SearchIndex``, all that's necessary is to subclass both ``indexes.SearchIndex`` & ``indexes.Indexable``, define the fields you want to store data with and define a ``get_model`` method. We'll create the following ``NoteIndex`` to correspond to our ``Note`` model. This code generally goes in a ``search_indexes.py`` file within the app it applies to, though that is not required. This allows Haystack to automatically pick it up. The ``NoteIndex`` should look like:: import datetime from haystack import indexes from myapp.models import Note class NoteIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) author = indexes.CharField(model_attr='user') pub_date = indexes.DateTimeField(model_attr='pub_date') def get_model(self): return Note def index_queryset(self, using=None): """Used when the entire index for model is updated.""" return self.get_model().objects.filter(pub_date__lte=datetime.datetime.now()) Every ``SearchIndex`` requires there be one (and only one) field with ``document=True``. This indicates to both Haystack and the search engine about which field is the primary field for searching within. .. warning:: When you choose a ``document=True`` field, it should be consistently named across all of your ``SearchIndex`` classes to avoid confusing the backend. The convention is to name this field ``text``. There is nothing special about the ``text`` field name used in all of the examples. It could be anything; you could call it ``pink_polka_dot`` and it won't matter. It's simply a convention to call it ``text``. Additionally, we're providing ``use_template=True`` on the ``text`` field. This allows us to use a data template (rather than error-prone concatenation) to build the document the search engine will index. You’ll need to create a new template inside your template directory called ``search/indexes/myapp/note_text.txt`` and place the following inside:: {{ object.title }} {{ object.user.get_full_name }} {{ object.body }} In addition, we added several other fields (``author`` and ``pub_date``). These are useful when you want to provide additional filtering options. Haystack comes with a variety of ``SearchField`` classes to handle most types of data. A common theme is to allow admin users to add future content but have it not display on the site until that future date is reached. We specify a custom ``index_queryset`` method to prevent those future items from being indexed. .. _Django admin site: http://docs.djangoproject.com/en/dev/ref/contrib/admin/ Setting Up The Views ==================== Add The ``SearchView`` To Your URLconf -------------------------------------- Within your URLconf, add the following line:: url(r'^search/', include('haystack.urls')), This will pull in the default URLconf for Haystack. It consists of a single URLconf that points to a ``SearchView`` instance. You can change this class's behavior by passing it any of several keyword arguments or override it entirely with your own view. Search Template --------------- Your search template (``search/search.html`` for the default case) will likely be very simple. The following is enough to get going (your template/block names will likely differ):: {% extends 'base.html' %} {% block content %}

Search

{{ form.as_table }}
 
{% if query %}

Results

{% for result in page.object_list %}

{{ result.object.title }}

{% empty %}

No results found.

{% endfor %} {% if page.has_previous or page.has_next %}
{% if page.has_previous %}{% endif %}« Previous{% if page.has_previous %}{% endif %} | {% if page.has_next %}{% endif %}Next »{% if page.has_next %}{% endif %}
{% endif %} {% else %} {# Show some example queries to run, maybe query syntax, something else? #} {% endif %}
{% endblock %} Note that the ``page.object_list`` is actually a list of ``SearchResult`` objects instead of individual models. These objects have all the data returned from that record within the search index as well as score. They can also directly access the model for the result via ``{{ result.object }}``. So the ``{{ result.object.title }}`` uses the actual ``Note`` object in the database and accesses its ``title`` field. Reindex ------- The final step, now that you have everything setup, is to put your data in from your database into the search index. Haystack ships with a management command to make this process easy. .. note:: If you're using the Solr backend, you have an extra step. Solr's configuration is XML-based, so you'll need to manually regenerate the schema. You should run ``./manage.py build_solr_schema`` first, drop the XML output in your Solr's ``schema.xml`` file and restart your Solr server. Simply run ``./manage.py rebuild_index``. You'll get some totals of how many models were processed and placed in the index. .. note:: Using the standard ``SearchIndex``, your search index content is only updated whenever you run either ``./manage.py update_index`` or start afresh with ``./manage.py rebuild_index``. You should cron up a ``./manage.py update_index`` job at whatever interval works best for your site (using ``--age=`` reduces the number of things to update). Alternatively, if you have low traffic and/or your search engine can handle it, the ``RealtimeSignalProcessor`` automatically handles updates/deletes for you. Complete! ========= You can now visit the search section of your site, enter a search query and receive search results back for the query! Congratulations! What's Next? ============ This tutorial just scratches the surface of what Haystack provides. The ``SearchQuerySet`` is the underpinning of all search in Haystack and provides a powerful, ``QuerySet``-like API (see :ref:`ref-searchqueryset-api`). You can use much more complicated ``SearchForms``/``SearchViews`` to give users a better UI (see :ref:`ref-views-and_forms`). And the :ref:`ref-best-practices` provides insight into non-obvious or advanced usages of Haystack. django-haystack-2.8.0/docs/utils.rst000066400000000000000000000005271325051407000174130ustar00rootroot00000000000000.. _ref-utils: ========= Utilities ========= Included here are some of the general use bits included with Haystack. ``get_identifier`` ------------------ .. function:: get_identifier(obj_or_string) Gets an unique identifier for the object or a string representing the object. If not overridden, uses ``..``. django-haystack-2.8.0/docs/views_and_forms.rst000066400000000000000000000363221325051407000214420ustar00rootroot00000000000000.. _ref-views-and_forms: ============= Views & Forms ============= .. note:: As of version 2.4 the views in ``haystack.views.SearchView`` are deprecated in favor of the new generic views in ``haystack.generic_views.SearchView`` which use the standard Django `class-based views`_ which are available in every version of Django which is supported by Haystack. .. _class-based views: https://docs.djangoproject.com/en/1.7/topics/class-based-views/ Haystack comes with some default, simple views & forms as well as some django-style views to help you get started and to cover the common cases. Included is a way to provide: * Basic, query-only search. * Search by models. * Search with basic highlighted results. * Faceted search. * Search by models with basic highlighted results. Most processing is done by the forms provided by Haystack via the ``search`` method. As a result, all but the faceted types (see :doc:`faceting`) use the standard ``SearchView``. There is very little coupling between the forms & the views (other than relying on the existence of a ``search`` method on the form), so you may interchangeably use forms and/or views anywhere within your own code. Forms ===== .. currentmodule:: haystack.forms ``SearchForm`` -------------- The most basic of the form types, this form consists of a single field, the ``q`` field (for query). Upon searching, the form will take the cleaned contents of the ``q`` field and perform an ``auto_query`` on either the custom ``SearchQuerySet`` you provide or off a default ``SearchQuerySet``. To customize the ``SearchQuerySet`` the form will use, pass it a ``searchqueryset`` parameter to the constructor with the ``SearchQuerySet`` you'd like to use. If using this form in conjunction with a ``SearchView``, the form will receive whatever ``SearchQuerySet`` you provide to the view with no additional work needed. The ``SearchForm`` also accepts a ``load_all`` parameter (``True`` or ``False``), which determines how the database is queried when iterating through the results. This also is received automatically from the ``SearchView``. All other forms in Haystack inherit (either directly or indirectly) from this form. ``HighlightedSearchForm`` ------------------------- Identical to the ``SearchForm`` except that it tags the ``highlight`` method on to the end of the ``SearchQuerySet`` to enable highlighted results. ``ModelSearchForm`` ------------------- This form adds new fields to form. It iterates through all registered models for the current ``SearchSite`` and provides a checkbox for each one. If no models are selected, all types will show up in the results. ``HighlightedModelSearchForm`` ------------------------------ Identical to the ``ModelSearchForm`` except that it tags the ``highlight`` method on to the end of the ``SearchQuerySet`` to enable highlighted results on the selected models. ``FacetedSearchForm`` --------------------- Identical to the ``SearchForm`` except that it adds a hidden ``selected_facets`` field onto the form, allowing the form to narrow the results based on the facets chosen by the user. Creating Your Own Form ---------------------- The simplest way to go about creating your own form is to inherit from ``SearchForm`` (or the desired parent) and extend the ``search`` method. By doing this, you save yourself most of the work of handling data correctly and stay API compatible with the ``SearchView``. For example, let's say you're providing search with a user-selectable date range associated with it. You might create a form that looked as follows:: from django import forms from haystack.forms import SearchForm class DateRangeSearchForm(SearchForm): start_date = forms.DateField(required=False) end_date = forms.DateField(required=False) def search(self): # First, store the SearchQuerySet received from other processing. sqs = super(DateRangeSearchForm, self).search() if not self.is_valid(): return self.no_query_found() # Check to see if a start_date was chosen. if self.cleaned_data['start_date']: sqs = sqs.filter(pub_date__gte=self.cleaned_data['start_date']) # Check to see if an end_date was chosen. if self.cleaned_data['end_date']: sqs = sqs.filter(pub_date__lte=self.cleaned_data['end_date']) return sqs This form adds two new fields for (optionally) choosing the start and end dates. Within the ``search`` method, we grab the results from the parent form's processing. Then, if a user has selected a start and/or end date, we apply that filtering. Finally, we simply return the ``SearchQuerySet``. Views ===== .. currentmodule:: haystack.views .. note:: As of version 2.4 the views in ``haystack.views.SearchView`` are deprecated in favor of the new generic views in ``haystack.generic_views.SearchView`` which use the standard Django `class-based views`_ which are available in every version of Django which is supported by Haystack. .. _class-based views: https://docs.djangoproject.com/en/1.7/topics/class-based-views/ New Django Class Based Views ---------------------------- .. versionadded:: 2.4.0 The views in ``haystack.generic_views.SearchView`` inherit from Django’s standard `FormView `_. The example views can be customized like any other Django class-based view as demonstrated in this example which filters the search results in ``get_queryset``:: # views.py from datetime import date from haystack.generic_views import SearchView class MySearchView(SearchView): """My custom search view.""" def get_queryset(self): queryset = super(MySearchView, self).get_queryset() # further filter queryset based on some set of criteria return queryset.filter(pub_date__gte=date(2015, 1, 1)) def get_context_data(self, *args, **kwargs): context = super(MySearchView, self).get_context_data(*args, **kwargs) # do something return context # urls.py urlpatterns = [ url(r'^/search/?$', MySearchView.as_view(), name='search_view'), ] Upgrading ~~~~~~~~~ Upgrading from basic usage of the old-style views to new-style views is usually as simple as: #. Create new views under ``views.py`` subclassing ``haystack.generic_views.SearchView`` or ``haystack.generic_views.FacetedSearchView`` #. Move all parameters of your old-style views from your ``urls.py`` to attributes on your new views. This will require renaming ``searchqueryset`` to ``queryset`` and ``template`` to ``template_name`` #. Review your templates and replace the ``page`` variable with ``page_obj`` Here's an example:: ### old-style views... # urls.py sqs = SearchQuerySet().filter(author='john') urlpatterns = [ url(r'^$', SearchView( template='my/special/path/john_search.html', searchqueryset=sqs, form_class=SearchForm ), name='haystack_search'), ] ### new-style views... # views.py class JohnSearchView(SearchView): template_name = 'my/special/path/john_search.html' queryset = SearchQuerySet().filter(author='john') form_class = SearchForm # urls.py from myapp.views import JohnSearchView urlpatterns = [ url(r'^$', JohnSearchView.as_view(), name='haystack_search'), ] If your views overrode methods on the old-style SearchView, you will need to refactor those methods to the equivalents on Django's generic views. For example, if you previously used ``extra_context()`` to add additional template variables or preprocess the values returned by Haystack, that code would move to ``get_context_data`` +-----------------------+-------------------------------------------+ | Old Method | New Method | +=======================+===========================================+ | ``extra_context()`` | `get_context_data()`_ | +-----------------------+-------------------------------------------+ | ``create_response()`` | `dispatch()`_ or ``get()`` and ``post()`` | +-----------------------+-------------------------------------------+ | ``get_query()`` | `get_queryset()`_ | +-----------------------+-------------------------------------------+ .. _get_context_data(): https://docs.djangoproject.com/en/1.7/ref/class-based-views/mixins-simple/#django.views.generic.base.ContextMixin.get_context_data .. _dispatch(): https://docs.djangoproject.com/en/1.7/ref/class-based-views/base/#django.views.generic.base.View.dispatch .. _get_queryset(): https://docs.djangoproject.com/en/1.7/ref/class-based-views/mixins-multiple-object/#django.views.generic.list.MultipleObjectMixin.get_queryset Old-Style Views --------------- .. deprecated:: 2.4.0 Haystack comes bundled with three views, the class-based views (``SearchView`` & ``FacetedSearchView``) and a traditional functional view (``basic_search``). The class-based views provide for easy extension should you need to alter the way a view works. Except in the case of faceting (again, see :doc:`faceting`), the ``SearchView`` works interchangeably with all other forms provided by Haystack. The functional view provides an example of how Haystack can be used in more traditional settings or as an example of how to write a more complex custom view. It is also thread-safe. ``SearchView(template=None, load_all=True, form_class=None, searchqueryset=None, results_per_page=None)`` --------------------------------------------------------------------------------------------------------------------------------------- The ``SearchView`` is designed to be easy/flexible enough to override common changes as well as being internally abstracted so that only altering a specific portion of the code should be easy to do. Without touching any of the internals of the ``SearchView``, you can modify which template is used, which form class should be instantiated to search with, what ``SearchQuerySet`` to use in the event you wish to pre-filter the results. what ``Context``-style object to use in the response and the ``load_all`` performance optimization to reduce hits on the database. These options can (and generally should) be overridden at the URLconf level. For example, to have a custom search limited to the 'John' author, displaying all models to search by and specifying a custom template (``my/special/path/john_search.html``), your URLconf should look something like:: from django.conf.urls import url from haystack.forms import ModelSearchForm from haystack.query import SearchQuerySet from haystack.views import SearchView sqs = SearchQuerySet().filter(author='john') # Without threading... urlpatterns = [ url(r'^$', SearchView( template='my/special/path/john_search.html', searchqueryset=sqs, form_class=SearchForm ), name='haystack_search'), ] # With threading... from haystack.views import SearchView, search_view_factory urlpatterns = [ url(r'^$', search_view_factory( view_class=SearchView, template='my/special/path/john_search.html', searchqueryset=sqs, form_class=ModelSearchForm ), name='haystack_search'), ] .. warning:: The standard ``SearchView`` is not thread-safe. Use the ``search_view_factory`` function, which returns thread-safe instances of ``SearchView``. By default, if you don't specify a ``form_class``, the view will use the ``haystack.forms.ModelSearchForm`` form. Beyond this customizations, you can create your own ``SearchView`` and extend/override the following methods to change the functionality. ``__call__(self, request)`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Generates the actual response to the search. Relies on internal, overridable methods to construct the response. You generally should avoid altering this method unless you need to change the flow of the methods or to add a new method into the processing. ``build_form(self, form_kwargs=None)`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Instantiates the form the class should use to process the search query. Optionally accepts a dictionary of parameters that are passed on to the form's ``__init__``. You can use this to lightly customize the form. You should override this if you write a custom form that needs special parameters for instantiation. ``get_query(self)`` ~~~~~~~~~~~~~~~~~~~ Returns the query provided by the user. Returns an empty string if the query is invalid. This pulls the cleaned query from the form, via the ``q`` field, for use elsewhere within the ``SearchView``. This is used to populate the ``query`` context variable. ``get_results(self)`` ~~~~~~~~~~~~~~~~~~~~~ Fetches the results via the form. Returns an empty list if there's no query to search with. This method relies on the form to do the heavy lifting as much as possible. ``build_page(self)`` ~~~~~~~~~~~~~~~~~~~~ Paginates the results appropriately. In case someone does not want to use Django's built-in pagination, it should be a simple matter to override this method to do what they would like. ``extra_context(self)`` ~~~~~~~~~~~~~~~~~~~~~~~ Allows the addition of more context variables as needed. Must return a dictionary whose contents will add to or overwrite the other variables in the context. ``create_response(self)`` ~~~~~~~~~~~~~~~~~~~~~~~~~ Generates the actual HttpResponse to send back to the user. It builds the page, creates the context and renders the response for all the aforementioned processing. ``basic_search(request, template='search/search.html', load_all=True, form_class=ModelSearchForm, searchqueryset=None, extra_context=None, results_per_page=None)`` ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- The ``basic_search`` tries to provide most of the same functionality as the class-based views but resembles a more traditional generic view. It's both a working view if you prefer not to use the class-based views as well as a good starting point for writing highly custom views. Since it is all one function, the only means of extension are passing in kwargs, similar to the way generic views work. Creating Your Own View ---------------------- As with the forms, inheritance is likely your best bet. In this case, the ``FacetedSearchView`` is a perfect example of how to extend the existing ``SearchView``. The complete code for the ``FacetedSearchView`` looks like:: class FacetedSearchView(SearchView): def extra_context(self): extra = super(FacetedSearchView, self).extra_context() if self.results == []: extra['facets'] = self.form.search().facet_counts() else: extra['facets'] = self.results.facet_counts() return extra It updates the name of the class (generally for documentation purposes) and adds the facets from the ``SearchQuerySet`` to the context as the ``facets`` variable. As with the custom form example above, it relies on the parent class to handle most of the processing and extends that only where needed. django-haystack-2.8.0/docs/who_uses.rst000066400000000000000000000120731325051407000201060ustar00rootroot00000000000000.. _ref-who-uses: Sites Using Haystack ==================== The following sites are a partial list of people using Haystack. I'm always interested in adding more sites, so please find me (``daniellindsley``) via IRC or the mailing list thread. LJWorld/Lawrence.com/KUSports ----------------------------- For all things search-related. Using: Solr * http://www2.ljworld.com/search/ * http://www2.ljworld.com/search/vertical/news.story/ * http://www2.ljworld.com/marketplace/ * http://www.lawrence.com/search/ * http://www.kusports.com/search/ AltWeeklies ----------- Providing an API to story aggregation. Using: Whoosh * http://www.northcoastjournal.com/altweeklies/documentation/ Teachoo ----------- Teachoo uses Haystack for its site search. Using: Elasticsearch * https://www.teachoo.com/ Trapeze ------- Various projects. Using: Xapian * http://www.trapeze.com/ * http://www.windmobile.ca/ * http://www.bonefishgrill.com/ * http://www.canadiantire.ca/ (Portions of) Vickerey.com ------------ For (really well done) search & faceting. Using: Solr * http://store.vickerey.com/products/search/ Eldarion -------- Various projects. Using: Solr * http://eldarion.com/ Sunlight Labs ------------- For general search. Using: Whoosh & Solr * http://sunlightlabs.com/ * http://subsidyscope.com/ NASA ---- For general search. Using: Solr * An internal site called SMD Spacebook 1.1. * http://science.nasa.gov/ AllForLocal ----------- For general search. * http://www.allforlocal.com/ HUGE ---- Various projects. Using: Solr * http://hugeinc.com/ * http://houselogic.com/ Brick Design ------------ For search on Explore. Using: Solr * http://bricksf.com/ * http://explore.org/ Winding Road ------------ For general search. Using: Solr * http://www.windingroad.com/ Reddit ------ For Reddit Gifts. Using: Whoosh * http://redditgifts.com/ Pegasus News ------------ For general search. Using: Xapian * http://www.pegasusnews.com/ Rampframe --------- For general search. Using: Xapian * http://www.rampframe.com/ Forkinit -------- For general search, model-specific search and suggestions via MLT. Using: Solr * http://forkinit.com/ Structured Abstraction ---------------------- For general search. Using: Xapian * http://www.structuredabstraction.com/ * http://www.delivergood.org/ CustomMade ---------- For general search. Using: Solr * http://www.custommade.com/ University of the Andes, Dept. of Political Science --------------------------------------------------- For general search & section-specific search. Developed by Monoku. Using: Solr * http://www.congresovisible.org/ * http://www.monoku.com/ Christchurch Art Gallery ------------------------ For general search & section-specific search. Using: Solr * http://christchurchartgallery.org.nz/search/ * http://christchurchartgallery.org.nz/collection/browse/ DevCheatSheet.com ----------------- For general search. Using: Xapian * http://devcheatsheet.com/ TodasLasRecetas --------------- For search, faceting & More Like This. Using: Solr * http://www.todaslasrecetas.es/receta/s/?q=langostinos * http://www.todaslasrecetas.es/receta/9526/brochetas-de-langostinos AstroBin -------- For general search. Using: Solr * http://www.astrobin.com/ European Paper Company ---------------------- For general search. Using: ??? * http://europeanpaper.com/ mtn-op ------ For general search. Using: ??? * http://mountain-op.com/ Crate ----- Crate is a PyPI mirror/replacement. It's using Haystack to power all search & faceted navigation on the site. Using: Elasticsearch * https://crate.io/ Pix Populi ---------- Pix Populi is a popular French photo sharing site. Using: Solr * http://www.pix-populi.fr/ LocalWiki ---------- LocalWiki is a tool for collaborating in local, geographic communities. It's using Haystack to power search on every LocalWiki instance. Using: Solr * http://localwiki.org/ Pitchup ------- For faceting, geo and autocomplete. Using: ??? * http://www.pitchup.com/search/ Gidsy ----- Gidsy makes it easy for anyone to organize and find exciting things to do everywhere in the world. For activity search, area pages, forums and private messages. Using: Elasticsearch * https://gidsy.com/ * https://gidsy.com/search/ * https://gidsy.com/forum/ GroundCity ---------- Groundcity is a Romanian dynamic real estate site. For real estate, forums and comments. Using: Whoosh * http://groundcity.ro/cautare/ Docket Alarm ------------ Docket Alarm allows people to search court dockets across the country. With it, you can search court dockets in the International Trade Commission (ITC), the Patent Trial and Appeal Board (PTAB) and All Federal Courts. Using: Elasticsearch * https://www.docketalarm.com/search/ITC * https://www.docketalarm.com/search/PTAB * https://www.docketalarm.com/search/dockets Educreations ------------- Educreations makes it easy for anyone to teach what they know and learn what they don't with a recordable whiteboard. Haystack is used to provide search across users and lessons. Using: Solr * http://www.educreations.com/browse/ django-haystack-2.8.0/example_project/000077500000000000000000000000001325051407000177465ustar00rootroot00000000000000django-haystack-2.8.0/example_project/__init__.py000066400000000000000000000000001325051407000220450ustar00rootroot00000000000000django-haystack-2.8.0/example_project/bare_bones_app/000077500000000000000000000000001325051407000227055ustar00rootroot00000000000000django-haystack-2.8.0/example_project/bare_bones_app/__init__.py000066400000000000000000000000001325051407000250040ustar00rootroot00000000000000django-haystack-2.8.0/example_project/bare_bones_app/models.py000066400000000000000000000011451325051407000245430ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import datetime from django.db import models class Cat(models.Model): name = models.CharField(max_length=255) birth_date = models.DateField(default=datetime.date.today) bio = models.TextField(blank=True) created = models.DateTimeField(default=datetime.datetime.now) updated = models.DateTimeField(default=datetime.datetime.now) def __unicode__(self): return self.name @models.permalink def get_absolute_url(self): return ('cat_detail', [], {'id': self.id}) django-haystack-2.8.0/example_project/bare_bones_app/search_indexes.py000066400000000000000000000007611325051407000262470ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from bare_bones_app.models import Cat from haystack import indexes # For the most basic usage, you can use a subclass of # `haystack.indexes.BasicSearchIndex`, whose only requirement will be that # you create a `search/indexes/bare_bones_app/cat_text.txt` data template # for indexing. class CatIndex(indexes.BasicSearchIndex, indexes.Indexable): def get_model(self): return Cat django-haystack-2.8.0/example_project/regular_app/000077500000000000000000000000001325051407000222475ustar00rootroot00000000000000django-haystack-2.8.0/example_project/regular_app/__init__.py000066400000000000000000000000001325051407000243460ustar00rootroot00000000000000django-haystack-2.8.0/example_project/regular_app/models.py000066400000000000000000000024771325051407000241160ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import datetime from django.db import models BREED_CHOICES = [ ('collie', 'Collie'), ('labrador', 'Labrador'), ('pembroke', 'Pembroke Corgi'), ('shetland', 'Shetland Sheepdog'), ('border', 'Border Collie'), ] class Dog(models.Model): breed = models.CharField(max_length=255, choices=BREED_CHOICES) name = models.CharField(max_length=255) owner_last_name = models.CharField(max_length=255, blank=True) birth_date = models.DateField(default=datetime.date.today) bio = models.TextField(blank=True) public = models.BooleanField(default=True) created = models.DateTimeField(default=datetime.datetime.now) updated = models.DateTimeField(default=datetime.datetime.now) def __unicode__(self): return self.full_name() @models.permalink def get_absolute_url(self): return ('dog_detail', [], {'id': self.id}) def full_name(self): if self.owner_last_name: return u"%s %s" % (self.name, self.owner_last_name) return self.name class Toy(models.Model): dog = models.ForeignKey(Dog, related_name='toys') name = models.CharField(max_length=60) def __unicode__(self): return u"%s's %s" % (self.dog.name, self.name) django-haystack-2.8.0/example_project/regular_app/search_indexes.py000066400000000000000000000024711325051407000256110ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from regular_app.models import Dog from haystack import indexes # More typical usage involves creating a subclassed `SearchIndex`. This will # provide more control over how data is indexed, generally resulting in better # search. class DogIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) # We can pull data straight out of the model via `model_attr`. breed = indexes.CharField(model_attr='breed') # Note that callables are also OK to use. name = indexes.CharField(model_attr='full_name') bio = indexes.CharField(model_attr='name') birth_date = indexes.DateField(model_attr='birth_date') # Note that we can't assign an attribute here. We'll manually prepare it instead. toys = indexes.MultiValueField() def get_model(self): return Dog def index_queryset(self, using=None): return self.get_model().objects.filter(public=True) def prepare_toys(self, obj): # Store a list of id's for filtering return [toy.id for toy in obj.toys.all()] # Alternatively, you could store the names if searching for toy names # is more useful. # return [toy.name for toy in obj.toys.all()] django-haystack-2.8.0/example_project/settings.py000066400000000000000000000025741325051407000221700ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import os from django.conf import settings SECRET_KEY = 'CHANGE ME' # All the normal settings apply. What's included here are the bits you'll have # to customize. # Add Haystack to INSTALLED_APPS. You can do this by simply placing in your list. INSTALLED_APPS = settings.INSTALLED_APPS + ( 'haystack', ) HAYSTACK_CONNECTIONS = { 'default': { # For Solr: 'ENGINE': 'haystack.backends.solr_backend.SolrEngine', 'URL': 'http://localhost:9001/solr/example', 'TIMEOUT': 60 * 5, 'INCLUDE_SPELLING': True, }, 'elasticsearch': { 'ENGINE': 'haystack.backends.elasticsearch_backend.ElasticsearchSearchEngine', 'URL': 'http://localhost:9200', 'INDEX_NAME': 'example_project' }, 'whoosh': { # For Whoosh: 'ENGINE': 'haystack.backends.whoosh_backend.WhooshEngine', 'PATH': os.path.join(os.path.dirname(__file__), 'whoosh_index'), 'INCLUDE_SPELLING': True, }, 'simple': { # For Simple: 'ENGINE': 'haystack.backends.simple_backend.SimpleEngine', }, # 'xapian': { # # For Xapian (requires the third-party install): # 'ENGINE': 'xapian_backend.XapianEngine', # 'PATH': os.path.join(os.path.dirname(__file__), 'xapian_index'), # } } django-haystack-2.8.0/example_project/templates/000077500000000000000000000000001325051407000217445ustar00rootroot00000000000000django-haystack-2.8.0/example_project/templates/search/000077500000000000000000000000001325051407000232115ustar00rootroot00000000000000django-haystack-2.8.0/example_project/templates/search/indexes/000077500000000000000000000000001325051407000246505ustar00rootroot00000000000000django-haystack-2.8.0/example_project/templates/search/indexes/bare_bones_app/000077500000000000000000000000001325051407000276075ustar00rootroot00000000000000django-haystack-2.8.0/example_project/templates/search/indexes/bare_bones_app/cat_text.txt000066400000000000000000000000421325051407000321570ustar00rootroot00000000000000{{ object.name }} {{ object.bio }}django-haystack-2.8.0/example_project/templates/search/indexes/regular_app/000077500000000000000000000000001325051407000271515ustar00rootroot00000000000000django-haystack-2.8.0/example_project/templates/search/indexes/regular_app/dog_text.txt000066400000000000000000000001741325051407000315310ustar00rootroot00000000000000{{ object.full_name }} {{ object.breed }} {{ object.bio }} {% for toy in object.toys.all %} {{ toy.name }} {% endfor %}django-haystack-2.8.0/haystack/000077500000000000000000000000001325051407000163745ustar00rootroot00000000000000django-haystack-2.8.0/haystack/__init__.py000066400000000000000000000050441325051407000205100ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.conf import settings from django.core.exceptions import ImproperlyConfigured from pkg_resources import DistributionNotFound, get_distribution, parse_version from haystack import signals from haystack.constants import DEFAULT_ALIAS from haystack.utils import loading __author__ = 'Daniel Lindsley' try: pkg_distribution = get_distribution(__name__) __version__ = pkg_distribution.version version_info = pkg_distribution.parsed_version except DistributionNotFound: __version__ = '0.0.dev0' version_info = parse_version(__version__) default_app_config = 'haystack.apps.HaystackConfig' # Help people clean up from 1.X. if hasattr(settings, 'HAYSTACK_SITECONF'): raise ImproperlyConfigured('The HAYSTACK_SITECONF setting is no longer used & can be removed.') if hasattr(settings, 'HAYSTACK_SEARCH_ENGINE'): raise ImproperlyConfigured('The HAYSTACK_SEARCH_ENGINE setting has been replaced with HAYSTACK_CONNECTIONS.') if hasattr(settings, 'HAYSTACK_ENABLE_REGISTRATIONS'): raise ImproperlyConfigured('The HAYSTACK_ENABLE_REGISTRATIONS setting is no longer used & can be removed.') if hasattr(settings, 'HAYSTACK_INCLUDE_SPELLING'): raise ImproperlyConfigured('The HAYSTACK_INCLUDE_SPELLING setting is now a per-backend setting & belongs in HAYSTACK_CONNECTIONS.') # Check the 2.X+ bits. if not hasattr(settings, 'HAYSTACK_CONNECTIONS'): raise ImproperlyConfigured('The HAYSTACK_CONNECTIONS setting is required.') if DEFAULT_ALIAS not in settings.HAYSTACK_CONNECTIONS: raise ImproperlyConfigured("The default alias '%s' must be included in the HAYSTACK_CONNECTIONS setting." % DEFAULT_ALIAS) # Load the connections. connections = loading.ConnectionHandler(settings.HAYSTACK_CONNECTIONS) # Just check HAYSTACK_ROUTERS setting validity, routers will be loaded lazily if hasattr(settings, 'HAYSTACK_ROUTERS'): if not isinstance(settings.HAYSTACK_ROUTERS, (list, tuple)): raise ImproperlyConfigured("The HAYSTACK_ROUTERS setting must be either a list or tuple.") # Load the router(s). connection_router = loading.ConnectionRouter() # Per-request, reset the ghetto query log. # Probably not extraordinarily thread-safe but should only matter when # DEBUG = True. def reset_search_queries(**kwargs): for conn in connections.all(): if conn: conn.reset_queries() if settings.DEBUG: from django.core import signals as django_signals django_signals.request_started.connect(reset_search_queries) django-haystack-2.8.0/haystack/admin.py000066400000000000000000000146471325051407000200520ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.contrib.admin.options import ModelAdmin, csrf_protect_m from django.contrib.admin.views.main import SEARCH_VAR, ChangeList from django.core.exceptions import PermissionDenied from django.core.paginator import InvalidPage, Paginator from django.shortcuts import render from django.utils.encoding import force_text from django.utils.translation import ungettext from haystack import connections from haystack.query import SearchQuerySet from haystack.utils import get_model_ct_tuple def list_max_show_all(changelist): """ Returns the maximum amount of results a changelist can have for the "Show all" link to be displayed in a manner compatible with both Django 1.4 and 1.3. See Django ticket #15997 for details. """ try: # This import is available in Django 1.3 and below from django.contrib.admin.views.main import MAX_SHOW_ALL_ALLOWED return MAX_SHOW_ALL_ALLOWED except ImportError: return changelist.list_max_show_all class SearchChangeList(ChangeList): def __init__(self, **kwargs): self.haystack_connection = kwargs.pop('haystack_connection', 'default') super(SearchChangeList, self).__init__(**kwargs) def get_results(self, request): if not SEARCH_VAR in request.GET: return super(SearchChangeList, self).get_results(request) # Note that pagination is 0-based, not 1-based. sqs = SearchQuerySet(self.haystack_connection).models(self.model).auto_query(request.GET[SEARCH_VAR]).load_all() paginator = Paginator(sqs, self.list_per_page) # Get the number of objects, with admin filters applied. result_count = paginator.count full_result_count = SearchQuerySet(self.haystack_connection).models(self.model).all().count() can_show_all = result_count <= list_max_show_all(self) multi_page = result_count > self.list_per_page # Get the list of objects to display on this page. try: result_list = paginator.page(self.page_num + 1).object_list # Grab just the Django models, since that's what everything else is # expecting. result_list = [result.object for result in result_list] except InvalidPage: result_list = () self.result_count = result_count self.full_result_count = full_result_count self.result_list = result_list self.can_show_all = can_show_all self.multi_page = multi_page self.paginator = paginator class SearchModelAdminMixin(object): # haystack connection to use for searching haystack_connection = 'default' @csrf_protect_m def changelist_view(self, request, extra_context=None): if not self.has_change_permission(request, None): raise PermissionDenied if not SEARCH_VAR in request.GET: # Do the usual song and dance. return super(SearchModelAdminMixin, self).changelist_view(request, extra_context) # Do a search of just this model and populate a Changelist with the # returned bits. if not self.model in connections[self.haystack_connection].get_unified_index().get_indexed_models(): # Oops. That model isn't being indexed. Return the usual # behavior instead. return super(SearchModelAdminMixin, self).changelist_view(request, extra_context) # So. Much. Boilerplate. # Why copy-paste a few lines when you can copy-paste TONS of lines? list_display = list(self.list_display) kwargs = { 'haystack_connection': self.haystack_connection, 'request': request, 'model': self.model, 'list_display': list_display, 'list_display_links': self.list_display_links, 'list_filter': self.list_filter, 'date_hierarchy': self.date_hierarchy, 'search_fields': self.search_fields, 'list_select_related': self.list_select_related, 'list_per_page': self.list_per_page, 'list_editable': self.list_editable, 'model_admin': self } # Django 1.4 compatibility. if hasattr(self, 'list_max_show_all'): kwargs['list_max_show_all'] = self.list_max_show_all changelist = SearchChangeList(**kwargs) formset = changelist.formset = None media = self.media # Build the action form and populate it with available actions. # Check actions to see if any are available on this changelist actions = self.get_actions(request) if actions: action_form = self.action_form(auto_id=None) action_form.fields['action'].choices = self.get_action_choices(request) else: action_form = None selection_note = ungettext('0 of %(count)d selected', 'of %(count)d selected', len(changelist.result_list)) selection_note_all = ungettext('%(total_count)s selected', 'All %(total_count)s selected', changelist.result_count) context = { 'module_name': force_text(self.model._meta.verbose_name_plural), 'selection_note': selection_note % {'count': len(changelist.result_list)}, 'selection_note_all': selection_note_all % {'total_count': changelist.result_count}, 'title': changelist.title, 'is_popup': changelist.is_popup, 'cl': changelist, 'media': media, 'has_add_permission': self.has_add_permission(request), # More Django 1.4 compatibility 'root_path': getattr(self.admin_site, 'root_path', None), 'app_label': self.model._meta.app_label, 'action_form': action_form, 'actions_on_top': self.actions_on_top, 'actions_on_bottom': self.actions_on_bottom, 'actions_selection_counter': getattr(self, 'actions_selection_counter', 0), } context.update(extra_context or {}) request.current_app = self.admin_site.name app_name, model_name = get_model_ct_tuple(self.model) return render(request, self.change_list_template or [ 'admin/%s/%s/change_list.html' % (app_name, model_name), 'admin/%s/change_list.html' % app_name, 'admin/change_list.html' ], context) class SearchModelAdmin(SearchModelAdminMixin, ModelAdmin): pass django-haystack-2.8.0/haystack/apps.py000066400000000000000000000016171325051407000177160ustar00rootroot00000000000000from __future__ import unicode_literals import logging from django.apps import AppConfig from django.conf import settings from haystack import connection_router, connections from haystack.utils import loading class HaystackConfig(AppConfig): name = 'haystack' signal_processor = None stream = None def ready(self): # Setup default logging. log = logging.getLogger('haystack') self.stream = logging.StreamHandler() self.stream.setLevel(logging.INFO) log.addHandler(self.stream) # Setup the signal processor. if not self.signal_processor: signal_processor_path = getattr(settings, 'HAYSTACK_SIGNAL_PROCESSOR', 'haystack.signals.BaseSignalProcessor') signal_processor_class = loading.import_class(signal_processor_path) self.signal_processor = signal_processor_class(connections, connection_router) django-haystack-2.8.0/haystack/backends/000077500000000000000000000000001325051407000201465ustar00rootroot00000000000000django-haystack-2.8.0/haystack/backends/__init__.py000066400000000000000000001067711325051407000222730ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import unicode_literals import copy from copy import deepcopy from time import time from django.conf import settings from django.db.models import Q from django.db.models.base import ModelBase from django.utils import six from django.utils import tree from django.utils.encoding import force_text from haystack.constants import VALID_FILTERS, FILTER_SEPARATOR, DEFAULT_ALIAS from haystack.exceptions import MoreLikeThisError, FacetingError from haystack.models import SearchResult from haystack.utils.loading import UnifiedIndex from haystack.utils import get_model_ct VALID_GAPS = ['year', 'month', 'day', 'hour', 'minute', 'second'] SPELLING_SUGGESTION_HAS_NOT_RUN = object() def log_query(func): """ A decorator for pseudo-logging search queries. Used in the ``SearchBackend`` to wrap the ``search`` method. """ def wrapper(obj, query_string, *args, **kwargs): start = time() try: return func(obj, query_string, *args, **kwargs) finally: stop = time() if settings.DEBUG: from haystack import connections connections[obj.connection_alias].queries.append({ 'query_string': query_string, 'additional_args': args, 'additional_kwargs': kwargs, 'time': "%.3f" % (stop - start), 'start': start, 'stop': stop, }) return wrapper class EmptyResults(object): hits = 0 docs = [] def __len__(self): return 0 def __getitem__(self, k): if isinstance(k, slice): return [] else: raise IndexError("It's not here.") class BaseSearchBackend(object): """ Abstract search engine base class. """ # Backends should include their own reserved words/characters. RESERVED_WORDS = [] RESERVED_CHARACTERS = [] def __init__(self, connection_alias, **connection_options): self.connection_alias = connection_alias self.timeout = connection_options.get('TIMEOUT', 10) self.include_spelling = connection_options.get('INCLUDE_SPELLING', False) self.batch_size = connection_options.get('BATCH_SIZE', 1000) self.silently_fail = connection_options.get('SILENTLY_FAIL', True) self.distance_available = connection_options.get('DISTANCE_AVAILABLE', False) def update(self, index, iterable, commit=True): """ Updates the backend when given a SearchIndex and a collection of documents. This method MUST be implemented by each backend, as it will be highly specific to each one. """ raise NotImplementedError def remove(self, obj_or_string): """ Removes a document/object from the backend. Can be either a model instance or the identifier (i.e. ``app_name.model_name.id``) in the event the object no longer exists. This method MUST be implemented by each backend, as it will be highly specific to each one. """ raise NotImplementedError def clear(self, models=None, commit=True): """ Clears the backend of all documents/objects for a collection of models. This method MUST be implemented by each backend, as it will be highly specific to each one. """ raise NotImplementedError @log_query def search(self, query_string, **kwargs): """ Takes a query to search on and returns dictionary. The query should be a string that is appropriate syntax for the backend. The returned dictionary should contain the keys 'results' and 'hits'. The 'results' value should be an iterable of populated SearchResult objects. The 'hits' should be an integer count of the number of matched results the search backend found. This method MUST be implemented by each backend, as it will be highly specific to each one. """ raise NotImplementedError def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None, fields='', highlight=False, facets=None, date_facets=None, query_facets=None, narrow_queries=None, spelling_query=None, within=None, dwithin=None, distance_point=None, models=None, limit_to_registered_models=None, result_class=None, **extra_kwargs): # A convenience method most backends should include in order to make # extension easier. raise NotImplementedError def prep_value(self, value): """ Hook to give the backend a chance to prep an attribute value before sending it to the search engine. By default, just force it to unicode. """ return force_text(value) def more_like_this(self, model_instance, additional_query_string=None, result_class=None): """ Takes a model object and returns results the backend thinks are similar. This method MUST be implemented by each backend, as it will be highly specific to each one. """ raise NotImplementedError("Subclasses must provide a way to fetch similar record via the 'more_like_this' method if supported by the backend.") def extract_file_contents(self, file_obj): """ Hook to allow backends which support rich-content types such as PDF, Word, etc. extraction to process the provided file object and return the contents for indexing Returns None if metadata cannot be extracted; otherwise returns a dictionary containing at least two keys: :contents: Extracted full-text content, if applicable :metadata: key:value pairs of text strings """ raise NotImplementedError("Subclasses must provide a way to extract metadata via the 'extract' method if supported by the backend.") def build_schema(self, fields): """ Takes a dictionary of fields and returns schema information. This method MUST be implemented by each backend, as it will be highly specific to each one. """ raise NotImplementedError("Subclasses must provide a way to build their schema.") def build_models_list(self): """ Builds a list of models for searching. The ``search`` method should use this and the ``django_ct`` field to narrow the results (unless the user indicates not to). This helps ignore any results that are not currently handled models and ensures consistent caching. """ from haystack import connections models = [] for model in connections[self.connection_alias].get_unified_index().get_indexed_models(): models.append(get_model_ct(model)) return models # Alias for easy loading within SearchQuery objects. SearchBackend = BaseSearchBackend class SearchNode(tree.Node): """ Manages an individual condition within a query. Most often, this will be a lookup to ensure that a certain word or phrase appears in the documents being indexed. However, it also supports filtering types (such as 'lt', 'gt', 'in' and others) for more complex lookups. This object creates a tree, with children being a list of either more ``SQ`` objects or the expressions/values themselves. """ AND = 'AND' OR = 'OR' default = AND # Start compat. Django 1.6 changed how ``tree.Node`` works, so we're going # to patch back in the original implementation until time to rewrite this # presents itself. # See https://github.com/django/django/commit/d3f00bd. def __init__(self, children=None, connector=None, negated=False): """ Constructs a new Node. If no connector is given, the default will be used. Warning: You probably don't want to pass in the 'negated' parameter. It is NOT the same as constructing a node and calling negate() on the result. """ self.children = children and children[:] or [] self.connector = connector or self.default self.subtree_parents = [] self.negated = negated # We need this because of django.db.models.query_utils.Q. Q. __init__() is # problematic, but it is a natural Node subclass in all other respects. def _new_instance(cls, children=None, connector=None, negated=False): """ This is called to create a new instance of this class when we need new Nodes (or subclasses) in the internal code in this class. Normally, it just shadows __init__(). However, subclasses with an __init__ signature that is not an extension of Node.__init__ might need to implement this method to allow a Node to create a new instance of them (if they have any extra setting up to do). """ obj = SearchNode(children, connector, negated) obj.__class__ = cls return obj _new_instance = classmethod(_new_instance) def __str__(self): if self.negated: return '(NOT (%s: %s))' % (self.connector, ', '.join([str(c) for c in self.children])) return '(%s: %s)' % (self.connector, ', '.join([str(c) for c in self.children])) def __deepcopy__(self, memodict): """ Utility method used by copy.deepcopy(). """ obj = SearchNode(connector=self.connector, negated=self.negated) obj.__class__ = self.__class__ obj.children = copy.deepcopy(self.children, memodict) obj.subtree_parents = copy.deepcopy(self.subtree_parents, memodict) return obj def __len__(self): """ The size of a node if the number of children it has. """ return len(self.children) def __bool__(self): """ For truth value testing. """ return bool(self.children) def __nonzero__(self): # Python 2 compatibility return type(self).__bool__(self) def __contains__(self, other): """ Returns True is 'other' is a direct child of this instance. """ return other in self.children def add(self, node, conn_type): """ Adds a new node to the tree. If the conn_type is the same as the root's current connector type, the node is added to the first level. Otherwise, the whole tree is pushed down one level and a new root connector is created, connecting the existing tree and the new node. """ if node in self.children and conn_type == self.connector: return if len(self.children) < 2: self.connector = conn_type if self.connector == conn_type: if isinstance(node, SearchNode) and (node.connector == conn_type or len(node) == 1): self.children.extend(node.children) else: self.children.append(node) else: obj = self._new_instance(self.children, self.connector, self.negated) self.connector = conn_type self.children = [obj, node] def negate(self): """ Negate the sense of the root connector. This reorganises the children so that the current node has a single child: a negated node containing all the previous children. This slightly odd construction makes adding new children behave more intuitively. Interpreting the meaning of this negate is up to client code. This method is useful for implementing "not" arrangements. """ self.children = [self._new_instance(self.children, self.connector, not self.negated)] self.connector = self.default def start_subtree(self, conn_type): """ Sets up internal state so that new nodes are added to a subtree of the current node. The conn_type specifies how the sub-tree is joined to the existing children. """ if len(self.children) == 1: self.connector = conn_type elif self.connector != conn_type: self.children = [self._new_instance(self.children, self.connector, self.negated)] self.connector = conn_type self.negated = False self.subtree_parents.append(self.__class__(self.children, self.connector, self.negated)) self.connector = self.default self.negated = False self.children = [] def end_subtree(self): """ Closes off the most recently unmatched start_subtree() call. This puts the current state into a node of the parent tree and returns the current instances state to be the parent. """ obj = self.subtree_parents.pop() node = self.__class__(self.children, self.connector) self.connector = obj.connector self.negated = obj.negated self.children = obj.children self.children.append(node) # End compat. def __repr__(self): return '' % (self.connector, self.as_query_string(self._repr_query_fragment_callback)) def _repr_query_fragment_callback(self, field, filter_type, value): if six.PY3: value = force_text(value) else: value = force_text(value).encode('utf8') return "%s%s%s=%s" % (field, FILTER_SEPARATOR, filter_type, value) def as_query_string(self, query_fragment_callback): """ Produces a portion of the search query from the current SQ and its children. """ result = [] for child in self.children: if hasattr(child, 'as_query_string'): result.append(child.as_query_string(query_fragment_callback)) else: expression, value = child field, filter_type = self.split_expression(expression) result.append(query_fragment_callback(field, filter_type, value)) conn = ' %s ' % self.connector query_string = conn.join(result) if query_string: if self.negated: query_string = 'NOT (%s)' % query_string elif len(self.children) != 1: query_string = '(%s)' % query_string return query_string def split_expression(self, expression): """Parses an expression and determines the field and filter type.""" parts = expression.split(FILTER_SEPARATOR) field = parts[0] if len(parts) == 1 or parts[-1] not in VALID_FILTERS: filter_type = 'content' else: filter_type = parts.pop() return (field, filter_type) class SQ(Q, SearchNode): """ Manages an individual condition within a query. Most often, this will be a lookup to ensure that a certain word or phrase appears in the documents being indexed. However, it also supports filtering types (such as 'lt', 'gt', 'in' and others) for more complex lookups. """ pass class BaseSearchQuery(object): """ A base class for handling the query itself. This class acts as an intermediary between the ``SearchQuerySet`` and the ``SearchBackend`` itself. The ``SearchQuery`` object maintains a tree of ``SQ`` objects. Each ``SQ`` object supports what field it looks up against, what kind of lookup (i.e. the __'s), what value it's looking for, if it's a AND/OR/NOT and tracks any children it may have. The ``SearchQuery.build_query`` method starts with the root of the tree, building part of the final query at each node until the full final query is ready for the ``SearchBackend``. Backends should extend this class and provide implementations for ``build_query_fragment``, ``clean`` and ``run``. See the ``solr`` backend for an example implementation. """ def __init__(self, using=DEFAULT_ALIAS): self.query_filter = SearchNode() self.order_by = [] self.models = set() self.boost = {} self.start_offset = 0 self.end_offset = None self.highlight = False self.facets = {} self.date_facets = {} self.query_facets = [] self.narrow_queries = set() #: If defined, fields should be a list of field names - no other values #: will be retrieved so the caller must be careful to include django_ct #: and django_id when using code which expects those to be included in #: the results self.fields = [] # Geospatial-related information self.within = {} self.dwithin = {} self.distance_point = {} # Internal. self._raw_query = None self._raw_query_params = {} self._more_like_this = False self._mlt_instance = None self._results = None self._hit_count = None self._facet_counts = None self._stats = None self._spelling_suggestion = SPELLING_SUGGESTION_HAS_NOT_RUN self.spelling_query = None self.result_class = SearchResult self.stats = {} from haystack import connections self._using = using self.backend = connections[self._using].get_backend() def __str__(self): return self.build_query() def __getstate__(self): """For pickling.""" obj_dict = self.__dict__.copy() del(obj_dict['backend']) return obj_dict def __setstate__(self, obj_dict): """For unpickling.""" from haystack import connections self.__dict__.update(obj_dict) self.backend = connections[self._using].get_backend() def has_run(self): """Indicates if any query has been been run.""" return None not in (self._results, self._hit_count) def build_params(self, spelling_query=None): """Generates a list of params to use when searching.""" kwargs = { 'start_offset': self.start_offset, } if self.order_by: kwargs['sort_by'] = self.order_by if self.end_offset is not None: kwargs['end_offset'] = self.end_offset if self.highlight: kwargs['highlight'] = self.highlight if self.facets: kwargs['facets'] = self.facets if self.date_facets: kwargs['date_facets'] = self.date_facets if self.query_facets: kwargs['query_facets'] = self.query_facets if self.narrow_queries: kwargs['narrow_queries'] = self.narrow_queries if spelling_query: kwargs['spelling_query'] = spelling_query elif self.spelling_query: kwargs['spelling_query'] = self.spelling_query if self.boost: kwargs['boost'] = self.boost if self.within: kwargs['within'] = self.within if self.dwithin: kwargs['dwithin'] = self.dwithin if self.distance_point: kwargs['distance_point'] = self.distance_point if self.result_class: kwargs['result_class'] = self.result_class if self.fields: kwargs['fields'] = self.fields if self.models: kwargs['models'] = self.models return kwargs def run(self, spelling_query=None, **kwargs): """Builds and executes the query. Returns a list of search results.""" final_query = self.build_query() search_kwargs = self.build_params(spelling_query=spelling_query) if kwargs: search_kwargs.update(kwargs) results = self.backend.search(final_query, **search_kwargs) self._results = results.get('results', []) self._hit_count = results.get('hits', 0) self._facet_counts = self.post_process_facets(results) self._spelling_suggestion = results.get('spelling_suggestion', None) def run_mlt(self, **kwargs): """ Executes the More Like This. Returns a list of search results similar to the provided document (and optionally query). """ if self._more_like_this is False or self._mlt_instance is None: raise MoreLikeThisError("No instance was provided to determine 'More Like This' results.") search_kwargs = { 'result_class': self.result_class, } if self.models: search_kwargs['models'] = self.models if kwargs: search_kwargs.update(kwargs) additional_query_string = self.build_query() results = self.backend.more_like_this(self._mlt_instance, additional_query_string, **search_kwargs) self._results = results.get('results', []) self._hit_count = results.get('hits', 0) def run_raw(self, **kwargs): """Executes a raw query. Returns a list of search results.""" search_kwargs = self.build_params() search_kwargs.update(self._raw_query_params) if kwargs: search_kwargs.update(kwargs) results = self.backend.search(self._raw_query, **search_kwargs) self._results = results.get('results', []) self._hit_count = results.get('hits', 0) self._facet_counts = results.get('facets', {}) self._spelling_suggestion = results.get('spelling_suggestion', None) def get_count(self): """ Returns the number of results the backend found for the query. If the query has not been run, this will execute the query and store the results. """ if self._hit_count is None: # Limit the slice to 1 so we get a count without consuming # everything. if not self.end_offset: self.end_offset = 1 if self._more_like_this: # Special case for MLT. self.run_mlt() elif self._raw_query: # Special case for raw queries. self.run_raw() else: self.run() return self._hit_count def get_results(self, **kwargs): """ Returns the results received from the backend. If the query has not been run, this will execute the query and store the results. """ if self._results is None: if self._more_like_this: # Special case for MLT. self.run_mlt(**kwargs) elif self._raw_query: # Special case for raw queries. self.run_raw(**kwargs) else: self.run(**kwargs) return self._results def get_facet_counts(self): """ Returns the facet counts received from the backend. If the query has not been run, this will execute the query and store the results. """ if self._facet_counts is None: self.run() return self._facet_counts def get_stats(self): """ Returns the stats received from the backend. If the query has not been run, this will execute the query and store the results """ if self._stats is None: self.run() return self._stats def set_spelling_query(self, spelling_query): self.spelling_query = spelling_query def get_spelling_suggestion(self, preferred_query=None): """ Returns the spelling suggestion received from the backend. If the query has not been run, this will execute the query and store the results. """ if self._spelling_suggestion is SPELLING_SUGGESTION_HAS_NOT_RUN: self.run(spelling_query=preferred_query) return self._spelling_suggestion def boost_fragment(self, boost_word, boost_value): """Generates query fragment for boosting a single word/value pair.""" return "%s^%s" % (boost_word, boost_value) def matching_all_fragment(self): """Generates the query that matches all documents.""" return '*' def build_query(self): """ Interprets the collected query metadata and builds the final query to be sent to the backend. """ final_query = self.query_filter.as_query_string(self.build_query_fragment) if not final_query: # Match all. final_query = self.matching_all_fragment() if self.boost: boost_list = [] for boost_word, boost_value in self.boost.items(): boost_list.append(self.boost_fragment(boost_word, boost_value)) final_query = "%s %s" % (final_query, " ".join(boost_list)) return final_query def combine(self, rhs, connector=SQ.AND): if connector == SQ.AND: self.add_filter(rhs.query_filter) elif connector == SQ.OR: self.add_filter(rhs.query_filter, use_or=True) # Methods for backends to implement. def build_query_fragment(self, field, filter_type, value): """ Generates a query fragment from a field, filter type and a value. Must be implemented in backends as this will be highly backend specific. """ raise NotImplementedError("Subclasses must provide a way to generate query fragments via the 'build_query_fragment' method.") # Standard methods to alter the query. def clean(self, query_fragment): """ Provides a mechanism for sanitizing user input before presenting the value to the backend. A basic (override-able) implementation is provided. """ if not isinstance(query_fragment, six.string_types): return query_fragment words = query_fragment.split() cleaned_words = [] for word in words: if word in self.backend.RESERVED_WORDS: word = word.replace(word, word.lower()) for char in self.backend.RESERVED_CHARACTERS: word = word.replace(char, '\\%s' % char) cleaned_words.append(word) return ' '.join(cleaned_words) def build_not_query(self, query_string): if ' ' in query_string: query_string = "(%s)" % query_string return u"NOT %s" % query_string def build_exact_query(self, query_string): return u'"%s"' % query_string def add_filter(self, query_filter, use_or=False): """ Adds a SQ to the current query. """ if use_or: connector = SQ.OR else: connector = SQ.AND if self.query_filter and query_filter.connector != connector and len(query_filter) > 1: self.query_filter.start_subtree(connector) subtree = True else: subtree = False for child in query_filter.children: if isinstance(child, tree.Node): self.query_filter.start_subtree(connector) self.add_filter(child) self.query_filter.end_subtree() else: expression, value = child self.query_filter.add((expression, value), connector) connector = query_filter.connector if query_filter.negated: self.query_filter.negate() if subtree: self.query_filter.end_subtree() def add_order_by(self, field): """Orders the search result by a field.""" self.order_by.append(field) def clear_order_by(self): """ Clears out all ordering that has been already added, reverting the query to relevancy. """ self.order_by = [] def add_model(self, model): """ Restricts the query requiring matches in the given model. This builds upon previous additions, so you can limit to multiple models by chaining this method several times. """ if not isinstance(model, ModelBase): raise AttributeError('The model being added to the query must derive from Model.') self.models.add(model) def set_limits(self, low=None, high=None): """Restricts the query by altering either the start, end or both offsets.""" if low is not None: self.start_offset = int(low) if high is not None: self.end_offset = int(high) def clear_limits(self): """Clears any existing limits.""" self.start_offset, self.end_offset = 0, None def add_boost(self, term, boost_value): """Adds a boosted term and the amount to boost it to the query.""" self.boost[term] = boost_value def raw_search(self, query_string, **kwargs): """ Runs a raw query (no parsing) against the backend. This method causes the SearchQuery to ignore the standard query generating facilities, running only what was provided instead. Note that any kwargs passed along will override anything provided to the rest of the ``SearchQuerySet``. """ self._raw_query = query_string self._raw_query_params = kwargs def more_like_this(self, model_instance): """ Allows backends with support for "More Like This" to return results similar to the provided instance. """ self._more_like_this = True self._mlt_instance = model_instance def add_stats_query(self, stats_field, stats_facets): """Adds stats and stats_facets queries for the Solr backend.""" self.stats[stats_field] = stats_facets def add_highlight(self, **kwargs): """Adds highlighting to the search results.""" self.highlight = kwargs or True def add_within(self, field, point_1, point_2): """Adds bounding box parameters to search query.""" from haystack.utils.geo import ensure_point self.within = { 'field': field, 'point_1': ensure_point(point_1), 'point_2': ensure_point(point_2), } def add_dwithin(self, field, point, distance): """Adds radius-based parameters to search query.""" from haystack.utils.geo import ensure_point, ensure_distance self.dwithin = { 'field': field, 'point': ensure_point(point), 'distance': ensure_distance(distance), } def add_distance(self, field, point): """ Denotes that results should include distance measurements from the point passed in. """ from haystack.utils.geo import ensure_point self.distance_point = { 'field': field, 'point': ensure_point(point), } def add_field_facet(self, field, **options): """Adds a regular facet on a field.""" from haystack import connections field_name = connections[self._using].get_unified_index().get_facet_fieldname(field) self.facets[field_name] = options.copy() def add_date_facet(self, field, start_date, end_date, gap_by, gap_amount=1): """Adds a date-based facet on a field.""" from haystack import connections if gap_by not in VALID_GAPS: raise FacetingError("The gap_by ('%s') must be one of the following: %s." % (gap_by, ', '.join(VALID_GAPS))) details = { 'start_date': start_date, 'end_date': end_date, 'gap_by': gap_by, 'gap_amount': gap_amount, } self.date_facets[connections[self._using].get_unified_index().get_facet_fieldname(field)] = details def add_query_facet(self, field, query): """Adds a query facet on a field.""" from haystack import connections self.query_facets.append((connections[self._using].get_unified_index().get_facet_fieldname(field), query)) def add_narrow_query(self, query): """ Narrows a search to a subset of all documents per the query. Generally used in conjunction with faceting. """ self.narrow_queries.add(query) def set_result_class(self, klass): """ Sets the result class to use for results. Overrides any previous usages. If ``None`` is provided, Haystack will revert back to the default ``SearchResult`` object. """ if klass is None: klass = SearchResult self.result_class = klass def post_process_facets(self, results): # Handle renaming the facet fields. Undecorate and all that. from haystack import connections revised_facets = {} field_data = connections[self._using].get_unified_index().all_searchfields() for facet_type, field_details in results.get('facets', {}).items(): temp_facets = {} for field, field_facets in field_details.items(): fieldname = field if field in field_data and hasattr(field_data[field], 'get_facet_for_name'): fieldname = field_data[field].get_facet_for_name() temp_facets[fieldname] = field_facets revised_facets[facet_type] = temp_facets return revised_facets def using(self, using=None): """ Allows for overriding which connection should be used. This disables the use of routers when performing the query. If ``None`` is provided, it has no effect on what backend is used. """ return self._clone(using=using) def _reset(self): """ Resets the instance's internal state to appear as though no query has been run before. Only need to tweak a few variables we check. """ self._results = None self._hit_count = None self._facet_counts = None self._spelling_suggestion = SPELLING_SUGGESTION_HAS_NOT_RUN def _clone(self, klass=None, using=None): if using is None: using = self._using else: from haystack import connections klass = connections[using].query if klass is None: klass = self.__class__ clone = klass(using=using) clone.query_filter = deepcopy(self.query_filter) clone.order_by = self.order_by[:] clone.models = self.models.copy() clone.boost = self.boost.copy() clone.highlight = self.highlight clone.stats = self.stats.copy() clone.facets = self.facets.copy() clone.date_facets = self.date_facets.copy() clone.query_facets = self.query_facets[:] clone.narrow_queries = self.narrow_queries.copy() clone.start_offset = self.start_offset clone.end_offset = self.end_offset clone.result_class = self.result_class clone.within = self.within.copy() clone.dwithin = self.dwithin.copy() clone.distance_point = self.distance_point.copy() clone._raw_query = self._raw_query clone._raw_query_params = self._raw_query_params clone.spelling_query = self.spelling_query clone._more_like_this = self._more_like_this clone._mlt_instance = self._mlt_instance return clone class BaseEngine(object): backend = BaseSearchBackend query = BaseSearchQuery unified_index = UnifiedIndex def __init__(self, using=None): if using is None: using = DEFAULT_ALIAS self.using = using self.options = settings.HAYSTACK_CONNECTIONS.get(self.using, {}) self.queries = [] self._index = None self._backend = None def get_backend(self): if self._backend is None: self._backend = self.backend(self.using, **self.options) return self._backend def reset_sessions(self): """Reset any transient connections, file handles, etc.""" self._backend = None def get_query(self): return self.query(using=self.using) def reset_queries(self): del self.queries[:] def get_unified_index(self): if self._index is None: self._index = self.unified_index(self.options.get('EXCLUDED_INDEXES', [])) return self._index django-haystack-2.8.0/haystack/backends/elasticsearch2_backend.py000066400000000000000000000337231325051407000250730ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import absolute_import, division, print_function, unicode_literals import datetime from django.conf import settings from haystack.backends import BaseEngine from haystack.backends.elasticsearch_backend import ElasticsearchSearchBackend, ElasticsearchSearchQuery from haystack.constants import DJANGO_CT from haystack.exceptions import MissingDependency from haystack.utils import get_identifier, get_model_ct from haystack.utils import log as logging try: import elasticsearch if not ((2, 0, 0) <= elasticsearch.__version__ < (3, 0, 0)): raise ImportError from elasticsearch.helpers import bulk, scan except ImportError: raise MissingDependency("The 'elasticsearch2' backend requires the \ installation of 'elasticsearch>=2.0.0,<3.0.0'. \ Please refer to the documentation.") class Elasticsearch2SearchBackend(ElasticsearchSearchBackend): def __init__(self, connection_alias, **connection_options): super(Elasticsearch2SearchBackend, self).__init__(connection_alias, **connection_options) self.content_field_name = None def clear(self, models=None, commit=True): """ Clears the backend of all documents/objects for a collection of models. :param models: List or tuple of models to clear. :param commit: Not used. """ if models is not None: assert isinstance(models, (list, tuple)) try: if models is None: self.conn.indices.delete(index=self.index_name, ignore=404) self.setup_complete = False self.existing_mapping = {} self.content_field_name = None else: models_to_delete = [] for model in models: models_to_delete.append("%s:%s" % (DJANGO_CT, get_model_ct(model))) # Delete using scroll API query = {'query': {'query_string': {'query': " OR ".join(models_to_delete)}}} generator = scan(self.conn, query=query, index=self.index_name, doc_type='modelresult') actions = ({ '_op_type': 'delete', '_id': doc['_id'], } for doc in generator) bulk(self.conn, actions=actions, index=self.index_name, doc_type='modelresult') self.conn.indices.refresh(index=self.index_name) except elasticsearch.TransportError as e: if not self.silently_fail: raise if models is not None: self.log.error("Failed to clear Elasticsearch index of models '%s': %s", ','.join(models_to_delete), e, exc_info=True) else: self.log.error("Failed to clear Elasticsearch index: %s", e, exc_info=True) def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None, fields='', highlight=False, facets=None, date_facets=None, query_facets=None, narrow_queries=None, spelling_query=None, within=None, dwithin=None, distance_point=None, models=None, limit_to_registered_models=None, result_class=None): kwargs = super(Elasticsearch2SearchBackend, self).build_search_kwargs(query_string, sort_by, start_offset, end_offset, fields, highlight, spelling_query=spelling_query, within=within, dwithin=dwithin, distance_point=distance_point, models=models, limit_to_registered_models= limit_to_registered_models, result_class=result_class) filters = [] if start_offset is not None: kwargs['from'] = start_offset if end_offset is not None: kwargs['size'] = end_offset - start_offset if narrow_queries is None: narrow_queries = set() if facets is not None: kwargs.setdefault('aggs', {}) for facet_fieldname, extra_options in facets.items(): facet_options = { 'meta': { '_type': 'terms', }, 'terms': { 'field': facet_fieldname, } } if 'order' in extra_options: facet_options['meta']['order'] = extra_options.pop('order') # Special cases for options applied at the facet level (not the terms level). if extra_options.pop('global_scope', False): # Renamed "global_scope" since "global" is a python keyword. facet_options['global'] = True if 'facet_filter' in extra_options: facet_options['facet_filter'] = extra_options.pop('facet_filter') facet_options['terms'].update(extra_options) kwargs['aggs'][facet_fieldname] = facet_options if date_facets is not None: kwargs.setdefault('aggs', {}) for facet_fieldname, value in date_facets.items(): # Need to detect on gap_by & only add amount if it's more than one. interval = value.get('gap_by').lower() # Need to detect on amount (can't be applied on months or years). if value.get('gap_amount', 1) != 1 and interval not in ('month', 'year'): # Just the first character is valid for use. interval = "%s%s" % (value['gap_amount'], interval[:1]) kwargs['aggs'][facet_fieldname] = { 'meta': { '_type': 'date_histogram', }, 'date_histogram': { 'field': facet_fieldname, 'interval': interval, }, 'aggs': { facet_fieldname: { 'date_range': { 'field': facet_fieldname, 'ranges': [ { 'from': self._from_python(value.get('start_date')), 'to': self._from_python(value.get('end_date')), } ] } } } } if query_facets is not None: kwargs.setdefault('aggs', {}) for facet_fieldname, value in query_facets: kwargs['aggs'][facet_fieldname] = { 'meta': { '_type': 'query', }, 'filter': { 'query_string': { 'query': value, } }, } for q in narrow_queries: filters.append({ 'query_string': { 'query': q } }) # if we want to filter, change the query type to filteres if filters: kwargs["query"] = {"filtered": {"query": kwargs.pop("query")}} filtered = kwargs["query"]["filtered"] if 'filter' in filtered: if "bool" in filtered["filter"].keys(): another_filters = kwargs['query']['filtered']['filter']['bool']['must'] else: another_filters = [kwargs['query']['filtered']['filter']] else: another_filters = filters if len(another_filters) == 1: kwargs['query']['filtered']["filter"] = another_filters[0] else: kwargs['query']['filtered']["filter"] = {"bool": {"must": another_filters}} return kwargs def more_like_this(self, model_instance, additional_query_string=None, start_offset=0, end_offset=None, models=None, limit_to_registered_models=None, result_class=None, **kwargs): from haystack import connections if not self.setup_complete: self.setup() # Deferred models will have a different class ("RealClass_Deferred_fieldname") # which won't be in our registry: model_klass = model_instance._meta.concrete_model index = connections[self.connection_alias].get_unified_index().get_index(model_klass) field_name = index.get_content_field() params = {} if start_offset is not None: params['from_'] = start_offset if end_offset is not None: params['size'] = end_offset - start_offset doc_id = get_identifier(model_instance) try: # More like this Query # https://www.elastic.co/guide/en/elasticsearch/reference/2.2/query-dsl-mlt-query.html mlt_query = { 'query': { 'more_like_this': { 'fields': [field_name], 'like': [{ "_id": doc_id }] } } } narrow_queries = [] if additional_query_string and additional_query_string != '*:*': additional_filter = { "query": { "query_string": { "query": additional_query_string } } } narrow_queries.append(additional_filter) if limit_to_registered_models is None: limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) if models and len(models): model_choices = sorted(get_model_ct(model) for model in models) elif limit_to_registered_models: # Using narrow queries, limit the results to only models handled # with the current routers. model_choices = self.build_models_list() else: model_choices = [] if len(model_choices) > 0: model_filter = {"terms": {DJANGO_CT: model_choices}} narrow_queries.append(model_filter) if len(narrow_queries) > 0: mlt_query = { "query": { "filtered": { 'query': mlt_query['query'], 'filter': { 'bool': { 'must': list(narrow_queries) } } } } } raw_results = self.conn.search( body=mlt_query, index=self.index_name, doc_type='modelresult', _source=True, **params) except elasticsearch.TransportError as e: if not self.silently_fail: raise self.log.error("Failed to fetch More Like This from Elasticsearch for document '%s': %s", doc_id, e, exc_info=True) raw_results = {} return self._process_results(raw_results, result_class=result_class) def _process_results(self, raw_results, highlight=False, result_class=None, distance_point=None, geo_sort=False): results = super(Elasticsearch2SearchBackend, self)._process_results(raw_results, highlight, result_class, distance_point, geo_sort) facets = {} if 'aggregations' in raw_results: facets = { 'fields': {}, 'dates': {}, 'queries': {}, } for facet_fieldname, facet_info in raw_results['aggregations'].items(): facet_type = facet_info['meta']['_type'] if facet_type == 'terms': facets['fields'][facet_fieldname] = [(individual['key'], individual['doc_count']) for individual in facet_info['buckets']] if 'order' in facet_info['meta']: if facet_info['meta']['order'] == 'reverse_count': srt = sorted(facets['fields'][facet_fieldname], key=lambda x: x[1]) facets['fields'][facet_fieldname] = srt elif facet_type == 'date_histogram': # Elasticsearch provides UTC timestamps with an extra three # decimals of precision, which datetime barfs on. facets['dates'][facet_fieldname] = [(datetime.datetime.utcfromtimestamp(individual['key'] / 1000), individual['doc_count']) for individual in facet_info['buckets']] elif facet_type == 'query': facets['queries'][facet_fieldname] = facet_info['doc_count'] results['facets'] = facets return results class Elasticsearch2SearchQuery(ElasticsearchSearchQuery): pass class Elasticsearch2SearchEngine(BaseEngine): backend = Elasticsearch2SearchBackend query = Elasticsearch2SearchQuery django-haystack-2.8.0/haystack/backends/elasticsearch_backend.py000066400000000000000000001121321325051407000250010ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import re import warnings from datetime import datetime, timedelta from django.conf import settings from django.core.exceptions import ImproperlyConfigured from django.utils import six import haystack from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, log_query from haystack.constants import DEFAULT_OPERATOR, DJANGO_CT, DJANGO_ID, FUZZY_MAX_EXPANSIONS, FUZZY_MIN_SIM, ID from haystack.exceptions import MissingDependency, MoreLikeThisError, SkipDocument from haystack.inputs import Clean, Exact, PythonData, Raw from haystack.models import SearchResult from haystack.utils import log as logging from haystack.utils import get_identifier, get_model_ct from haystack.utils.app_loading import haystack_get_model try: import elasticsearch try: # let's try this, for elasticsearch > 1.7.0 from elasticsearch.helpers import bulk except ImportError: # let's try this, for elasticsearch <= 1.7.0 from elasticsearch.helpers import bulk_index as bulk from elasticsearch.exceptions import NotFoundError except ImportError: raise MissingDependency("The 'elasticsearch' backend requires the installation of 'elasticsearch'. Please refer to the documentation.") DATETIME_REGEX = re.compile( r'^(?P\d{4})-(?P\d{2})-(?P\d{2})T' r'(?P\d{2}):(?P\d{2}):(?P\d{2})(\.\d+)?$') class ElasticsearchSearchBackend(BaseSearchBackend): # Word reserved by Elasticsearch for special use. RESERVED_WORDS = ( 'AND', 'NOT', 'OR', 'TO', ) # Characters reserved by Elasticsearch for special use. # The '\\' must come first, so as not to overwrite the other slash replacements. RESERVED_CHARACTERS = ( '\\', '+', '-', '&&', '||', '!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*', '?', ':', '/', ) # Settings to add an n-gram & edge n-gram analyzer. DEFAULT_SETTINGS = { 'settings': { "analysis": { "analyzer": { "ngram_analyzer": { "type": "custom", "tokenizer": "standard", "filter": ["haystack_ngram", "lowercase"] }, "edgengram_analyzer": { "type": "custom", "tokenizer": "standard", "filter": ["haystack_edgengram", "lowercase"] } }, "tokenizer": { "haystack_ngram_tokenizer": { "type": "nGram", "min_gram": 3, "max_gram": 15, }, "haystack_edgengram_tokenizer": { "type": "edgeNGram", "min_gram": 2, "max_gram": 15, "side": "front" } }, "filter": { "haystack_ngram": { "type": "nGram", "min_gram": 3, "max_gram": 15 }, "haystack_edgengram": { "type": "edgeNGram", "min_gram": 2, "max_gram": 15 } } } } } def __init__(self, connection_alias, **connection_options): super(ElasticsearchSearchBackend, self).__init__(connection_alias, **connection_options) if not 'URL' in connection_options: raise ImproperlyConfigured("You must specify a 'URL' in your settings for connection '%s'." % connection_alias) if not 'INDEX_NAME' in connection_options: raise ImproperlyConfigured("You must specify a 'INDEX_NAME' in your settings for connection '%s'." % connection_alias) self.conn = elasticsearch.Elasticsearch(connection_options['URL'], timeout=self.timeout, **connection_options.get('KWARGS', {})) self.index_name = connection_options['INDEX_NAME'] self.log = logging.getLogger('haystack') self.setup_complete = False self.existing_mapping = {} def setup(self): """ Defers loading until needed. """ # Get the existing mapping & cache it. We'll compare it # during the ``update`` & if it doesn't match, we'll put the new # mapping. try: self.existing_mapping = self.conn.indices.get_mapping(index=self.index_name) except NotFoundError: pass except Exception: if not self.silently_fail: raise unified_index = haystack.connections[self.connection_alias].get_unified_index() self.content_field_name, field_mapping = self.build_schema(unified_index.all_searchfields()) current_mapping = { 'modelresult': { 'properties': field_mapping, } } if current_mapping != self.existing_mapping: try: # Make sure the index is there first. self.conn.indices.create(index=self.index_name, body=self.DEFAULT_SETTINGS, ignore=400) self.conn.indices.put_mapping(index=self.index_name, doc_type='modelresult', body=current_mapping) self.existing_mapping = current_mapping except Exception: if not self.silently_fail: raise self.setup_complete = True def update(self, index, iterable, commit=True): if not self.setup_complete: try: self.setup() except elasticsearch.TransportError as e: if not self.silently_fail: raise self.log.error("Failed to add documents to Elasticsearch: %s", e, exc_info=True) return prepped_docs = [] for obj in iterable: try: prepped_data = index.full_prepare(obj) final_data = {} # Convert the data to make sure it's happy. for key, value in prepped_data.items(): final_data[key] = self._from_python(value) final_data['_id'] = final_data[ID] prepped_docs.append(final_data) except SkipDocument: self.log.debug(u"Indexing for object `%s` skipped", obj) except elasticsearch.TransportError as e: if not self.silently_fail: raise # We'll log the object identifier but won't include the actual object # to avoid the possibility of that generating encoding errors while # processing the log message: self.log.error(u"%s while preparing object for update" % e.__class__.__name__, exc_info=True, extra={"data": {"index": index, "object": get_identifier(obj)}}) bulk(self.conn, prepped_docs, index=self.index_name, doc_type='modelresult') if commit: self.conn.indices.refresh(index=self.index_name) def remove(self, obj_or_string, commit=True): doc_id = get_identifier(obj_or_string) if not self.setup_complete: try: self.setup() except elasticsearch.TransportError as e: if not self.silently_fail: raise self.log.error("Failed to remove document '%s' from Elasticsearch: %s", doc_id, e, exc_info=True) return try: self.conn.delete(index=self.index_name, doc_type='modelresult', id=doc_id, ignore=404) if commit: self.conn.indices.refresh(index=self.index_name) except elasticsearch.TransportError as e: if not self.silently_fail: raise self.log.error("Failed to remove document '%s' from Elasticsearch: %s", doc_id, e, exc_info=True) def clear(self, models=None, commit=True): # We actually don't want to do this here, as mappings could be # very different. # if not self.setup_complete: # self.setup() if models is not None: assert isinstance(models, (list, tuple)) try: if models is None: self.conn.indices.delete(index=self.index_name, ignore=404) self.setup_complete = False self.existing_mapping = {} else: models_to_delete = [] for model in models: models_to_delete.append("%s:%s" % (DJANGO_CT, get_model_ct(model))) # Delete by query in Elasticsearch asssumes you're dealing with # a ``query`` root object. :/ query = {'query': {'query_string': {'query': " OR ".join(models_to_delete)}}} self.conn.delete_by_query(index=self.index_name, doc_type='modelresult', body=query) except elasticsearch.TransportError as e: if not self.silently_fail: raise if models is not None: self.log.error("Failed to clear Elasticsearch index of models '%s': %s", ','.join(models_to_delete), e, exc_info=True) else: self.log.error("Failed to clear Elasticsearch index: %s", e, exc_info=True) def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None, fields='', highlight=False, facets=None, date_facets=None, query_facets=None, narrow_queries=None, spelling_query=None, within=None, dwithin=None, distance_point=None, models=None, limit_to_registered_models=None, result_class=None, **extra_kwargs): index = haystack.connections[self.connection_alias].get_unified_index() content_field = index.document_field if query_string == '*:*': kwargs = { 'query': { "match_all": {} }, } else: kwargs = { 'query': { 'query_string': { 'default_field': content_field, 'default_operator': DEFAULT_OPERATOR, 'query': query_string, 'analyze_wildcard': True, 'auto_generate_phrase_queries': True, 'fuzzy_min_sim': FUZZY_MIN_SIM, 'fuzzy_max_expansions': FUZZY_MAX_EXPANSIONS, }, }, } # so far, no filters filters = [] if fields: if isinstance(fields, (list, set)): fields = " ".join(fields) kwargs['fields'] = fields if sort_by is not None: order_list = [] for field, direction in sort_by: if field == 'distance' and distance_point: # Do the geo-enabled sort. lng, lat = distance_point['point'].coords sort_kwargs = { "_geo_distance": { distance_point['field']: [lng, lat], "order": direction, "unit": "km" } } else: if field == 'distance': warnings.warn("In order to sort by distance, you must call the '.distance(...)' method.") # Regular sorting. sort_kwargs = {field: {'order': direction}} order_list.append(sort_kwargs) kwargs['sort'] = order_list # From/size offsets don't seem to work right in Elasticsearch's DSL. :/ # if start_offset is not None: # kwargs['from'] = start_offset # if end_offset is not None: # kwargs['size'] = end_offset - start_offset if highlight: # `highlight` can either be True or a dictionary containing custom parameters # which will be passed to the backend and may override our default settings: kwargs['highlight'] = { 'fields': { content_field: {'store': 'yes'}, } } if isinstance(highlight, dict): kwargs['highlight'].update(highlight) if self.include_spelling: kwargs['suggest'] = { 'suggest': { 'text': spelling_query or query_string, 'term': { # Using content_field here will result in suggestions of stemmed words. 'field': '_all', }, }, } if narrow_queries is None: narrow_queries = set() if facets is not None: kwargs.setdefault('facets', {}) for facet_fieldname, extra_options in facets.items(): facet_options = { 'terms': { 'field': facet_fieldname, 'size': 100, }, } # Special cases for options applied at the facet level (not the terms level). if extra_options.pop('global_scope', False): # Renamed "global_scope" since "global" is a python keyword. facet_options['global'] = True if 'facet_filter' in extra_options: facet_options['facet_filter'] = extra_options.pop('facet_filter') facet_options['terms'].update(extra_options) kwargs['facets'][facet_fieldname] = facet_options if date_facets is not None: kwargs.setdefault('facets', {}) for facet_fieldname, value in date_facets.items(): # Need to detect on gap_by & only add amount if it's more than one. interval = value.get('gap_by').lower() # Need to detect on amount (can't be applied on months or years). if value.get('gap_amount', 1) != 1 and interval not in ('month', 'year'): # Just the first character is valid for use. interval = "%s%s" % (value['gap_amount'], interval[:1]) kwargs['facets'][facet_fieldname] = { 'date_histogram': { 'field': facet_fieldname, 'interval': interval, }, 'facet_filter': { "range": { facet_fieldname: { 'from': self._from_python(value.get('start_date')), 'to': self._from_python(value.get('end_date')), } } } } if query_facets is not None: kwargs.setdefault('facets', {}) for facet_fieldname, value in query_facets: kwargs['facets'][facet_fieldname] = { 'query': { 'query_string': { 'query': value, } }, } if limit_to_registered_models is None: limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) if models and len(models): model_choices = sorted(get_model_ct(model) for model in models) elif limit_to_registered_models: # Using narrow queries, limit the results to only models handled # with the current routers. model_choices = self.build_models_list() else: model_choices = [] if len(model_choices) > 0: filters.append({"terms": {DJANGO_CT: model_choices}}) for q in narrow_queries: filters.append({ 'fquery': { 'query': { 'query_string': { 'query': q }, }, '_cache': True, } }) if within is not None: from haystack.utils.geo import generate_bounding_box ((south, west), (north, east)) = generate_bounding_box(within['point_1'], within['point_2']) within_filter = { "geo_bounding_box": { within['field']: { "top_left": { "lat": north, "lon": west }, "bottom_right": { "lat": south, "lon": east } } }, } filters.append(within_filter) if dwithin is not None: lng, lat = dwithin['point'].coords # NB: the 1.0.0 release of elasticsearch introduce an # incompatible change on the distance filter formating if elasticsearch.VERSION >= (1, 0, 0): distance = "%(dist).6f%(unit)s" % { 'dist': dwithin['distance'].km, 'unit': "km" } else: distance = dwithin['distance'].km dwithin_filter = { "geo_distance": { "distance": distance, dwithin['field']: { "lat": lat, "lon": lng } } } filters.append(dwithin_filter) # if we want to filter, change the query type to filteres if filters: kwargs["query"] = {"filtered": {"query": kwargs.pop("query")}} if len(filters) == 1: kwargs['query']['filtered']["filter"] = filters[0] else: kwargs['query']['filtered']["filter"] = {"bool": {"must": filters}} if extra_kwargs: kwargs.update(extra_kwargs) return kwargs @log_query def search(self, query_string, **kwargs): if len(query_string) == 0: return { 'results': [], 'hits': 0, } if not self.setup_complete: self.setup() search_kwargs = self.build_search_kwargs(query_string, **kwargs) search_kwargs['from'] = kwargs.get('start_offset', 0) order_fields = set() for order in search_kwargs.get('sort', []): for key in order.keys(): order_fields.add(key) geo_sort = '_geo_distance' in order_fields end_offset = kwargs.get('end_offset') start_offset = kwargs.get('start_offset', 0) if end_offset is not None and end_offset > start_offset: search_kwargs['size'] = end_offset - start_offset try: raw_results = self.conn.search(body=search_kwargs, index=self.index_name, doc_type='modelresult', _source=True) except elasticsearch.TransportError as e: if not self.silently_fail: raise self.log.error("Failed to query Elasticsearch using '%s': %s", query_string, e, exc_info=True) raw_results = {} return self._process_results(raw_results, highlight=kwargs.get('highlight'), result_class=kwargs.get('result_class', SearchResult), distance_point=kwargs.get('distance_point'), geo_sort=geo_sort) def more_like_this(self, model_instance, additional_query_string=None, start_offset=0, end_offset=None, models=None, limit_to_registered_models=None, result_class=None, **kwargs): from haystack import connections if not self.setup_complete: self.setup() # Deferred models will have a different class ("RealClass_Deferred_fieldname") # which won't be in our registry: model_klass = model_instance._meta.concrete_model index = connections[self.connection_alias].get_unified_index().get_index(model_klass) field_name = index.get_content_field() params = {} if start_offset is not None: params['search_from'] = start_offset if end_offset is not None: params['search_size'] = end_offset - start_offset doc_id = get_identifier(model_instance) try: raw_results = self.conn.mlt(index=self.index_name, doc_type='modelresult', id=doc_id, mlt_fields=[field_name], **params) except elasticsearch.TransportError as e: if not self.silently_fail: raise self.log.error("Failed to fetch More Like This from Elasticsearch for document '%s': %s", doc_id, e, exc_info=True) raw_results = {} return self._process_results(raw_results, result_class=result_class) def _process_results(self, raw_results, highlight=False, result_class=None, distance_point=None, geo_sort=False): from haystack import connections results = [] hits = raw_results.get('hits', {}).get('total', 0) facets = {} spelling_suggestion = None if result_class is None: result_class = SearchResult if self.include_spelling and 'suggest' in raw_results: raw_suggest = raw_results['suggest'].get('suggest') if raw_suggest: spelling_suggestion = ' '.join([word['text'] if len(word['options']) == 0 else word['options'][0]['text'] for word in raw_suggest]) if 'facets' in raw_results: facets = { 'fields': {}, 'dates': {}, 'queries': {}, } # ES can return negative timestamps for pre-1970 data. Handle it. def from_timestamp(tm): if tm >= 0: return datetime.utcfromtimestamp(tm) else: return datetime(1970, 1, 1) + timedelta(seconds=tm) for facet_fieldname, facet_info in raw_results['facets'].items(): if facet_info.get('_type', 'terms') == 'terms': facets['fields'][facet_fieldname] = [(individual['term'], individual['count']) for individual in facet_info['terms']] elif facet_info.get('_type', 'terms') == 'date_histogram': # Elasticsearch provides UTC timestamps with an extra three # decimals of precision, which datetime barfs on. facets['dates'][facet_fieldname] = [(from_timestamp(individual['time'] / 1000), individual['count']) for individual in facet_info['entries']] elif facet_info.get('_type', 'terms') == 'query': facets['queries'][facet_fieldname] = facet_info['count'] unified_index = connections[self.connection_alias].get_unified_index() indexed_models = unified_index.get_indexed_models() content_field = unified_index.document_field for raw_result in raw_results.get('hits', {}).get('hits', []): source = raw_result['_source'] app_label, model_name = source[DJANGO_CT].split('.') additional_fields = {} model = haystack_get_model(app_label, model_name) if model and model in indexed_models: index = source and unified_index.get_index(model) for key, value in source.items(): string_key = str(key) if string_key in index.fields and hasattr(index.fields[string_key], 'convert'): additional_fields[string_key] = index.fields[string_key].convert(value) else: additional_fields[string_key] = self._to_python(value) del(additional_fields[DJANGO_CT]) del(additional_fields[DJANGO_ID]) if 'highlight' in raw_result: additional_fields['highlighted'] = raw_result['highlight'].get(content_field, '') if distance_point: additional_fields['_point_of_origin'] = distance_point if geo_sort and raw_result.get('sort'): from haystack.utils.geo import Distance additional_fields['_distance'] = Distance(km=float(raw_result['sort'][0])) else: additional_fields['_distance'] = None result = result_class(app_label, model_name, source[DJANGO_ID], raw_result['_score'], **additional_fields) results.append(result) else: hits -= 1 return { 'results': results, 'hits': hits, 'facets': facets, 'spelling_suggestion': spelling_suggestion, } def build_schema(self, fields): content_field_name = '' mapping = { DJANGO_CT: {'type': 'string', 'index': 'not_analyzed', 'include_in_all': False}, DJANGO_ID: {'type': 'string', 'index': 'not_analyzed', 'include_in_all': False}, } for field_name, field_class in fields.items(): field_mapping = FIELD_MAPPINGS.get(field_class.field_type, DEFAULT_FIELD_MAPPING).copy() if field_class.boost != 1.0: field_mapping['boost'] = field_class.boost if field_class.document is True: content_field_name = field_class.index_fieldname # Do this last to override `text` fields. if field_mapping['type'] == 'string': if field_class.indexed is False or hasattr(field_class, 'facet_for'): field_mapping['index'] = 'not_analyzed' del field_mapping['analyzer'] mapping[field_class.index_fieldname] = field_mapping return (content_field_name, mapping) def _iso_datetime(self, value): """ If value appears to be something datetime-like, return it in ISO format. Otherwise, return None. """ if hasattr(value, 'strftime'): if hasattr(value, 'hour'): return value.isoformat() else: return '%sT00:00:00' % value.isoformat() def _from_python(self, value): """Convert more Python data types to ES-understandable JSON.""" iso = self._iso_datetime(value) if iso: return iso elif isinstance(value, six.binary_type): # TODO: Be stricter. return six.text_type(value, errors='replace') elif isinstance(value, set): return list(value) return value def _to_python(self, value): """Convert values from ElasticSearch to native Python values.""" if isinstance(value, (int, float, complex, list, tuple, bool)): return value if isinstance(value, six.string_types): possible_datetime = DATETIME_REGEX.search(value) if possible_datetime: date_values = possible_datetime.groupdict() for dk, dv in date_values.items(): date_values[dk] = int(dv) return datetime(date_values['year'], date_values['month'], date_values['day'], date_values['hour'], date_values['minute'], date_values['second']) try: # This is slightly gross but it's hard to tell otherwise what the # string's original type might have been. Be careful who you trust. converted_value = eval(value) # Try to handle most built-in types. if isinstance( converted_value, (int, list, tuple, set, dict, float, complex)): return converted_value except Exception: # If it fails (SyntaxError or its ilk) or we don't trust it, # continue on. pass return value # DRL_FIXME: Perhaps move to something where, if none of these # match, call a custom method on the form that returns, per-backend, # the right type of storage? DEFAULT_FIELD_MAPPING = {'type': 'string', 'analyzer': 'snowball'} FIELD_MAPPINGS = { 'edge_ngram': {'type': 'string', 'analyzer': 'edgengram_analyzer'}, 'ngram': {'type': 'string', 'analyzer': 'ngram_analyzer'}, 'date': {'type': 'date'}, 'datetime': {'type': 'date'}, 'location': {'type': 'geo_point'}, 'boolean': {'type': 'boolean'}, 'float': {'type': 'float'}, 'long': {'type': 'long'}, 'integer': {'type': 'long'}, } # Sucks that this is almost an exact copy of what's in the Solr backend, # but we can't import due to dependencies. class ElasticsearchSearchQuery(BaseSearchQuery): def matching_all_fragment(self): return '*:*' def build_query_fragment(self, field, filter_type, value): from haystack import connections query_frag = '' if not hasattr(value, 'input_type_name'): # Handle when we've got a ``ValuesListQuerySet``... if hasattr(value, 'values_list'): value = list(value) if isinstance(value, six.string_types): # It's not an ``InputType``. Assume ``Clean``. value = Clean(value) else: value = PythonData(value) # Prepare the query using the InputType. prepared_value = value.prepare(self) if not isinstance(prepared_value, (set, list, tuple)): # Then convert whatever we get back to what pysolr wants if needed. prepared_value = self.backend._from_python(prepared_value) # 'content' is a special reserved word, much like 'pk' in # Django's ORM layer. It indicates 'no special field'. if field == 'content': index_fieldname = '' else: index_fieldname = u'%s:' % connections[self._using].get_unified_index().get_index_fieldname(field) filter_types = { 'content': u'%s', 'contains': u'*%s*', 'endswith': u'*%s', 'startswith': u'%s*', 'exact': u'%s', 'gt': u'{%s TO *}', 'gte': u'[%s TO *]', 'lt': u'{* TO %s}', 'lte': u'[* TO %s]', 'fuzzy': u'%s~', } if value.post_process is False: query_frag = prepared_value else: if filter_type in ['content', 'contains', 'startswith', 'endswith', 'fuzzy']: if value.input_type_name == 'exact': query_frag = prepared_value else: # Iterate over terms & incorportate the converted form of each into the query. terms = [] if isinstance(prepared_value, six.string_types): for possible_value in prepared_value.split(' '): terms.append(filter_types[filter_type] % self.backend._from_python(possible_value)) else: terms.append(filter_types[filter_type] % self.backend._from_python(prepared_value)) if len(terms) == 1: query_frag = terms[0] else: query_frag = u"(%s)" % " AND ".join(terms) elif filter_type == 'in': in_options = [] if not prepared_value: query_frag = u'(!*:*)' else: for possible_value in prepared_value: in_options.append(u'"%s"' % self.backend._from_python(possible_value)) query_frag = u"(%s)" % " OR ".join(in_options) elif filter_type == 'range': start = self.backend._from_python(prepared_value[0]) end = self.backend._from_python(prepared_value[1]) query_frag = u'["%s" TO "%s"]' % (start, end) elif filter_type == 'exact': if value.input_type_name == 'exact': query_frag = prepared_value else: prepared_value = Exact(prepared_value).prepare(self) query_frag = filter_types[filter_type] % prepared_value else: if value.input_type_name != 'exact': prepared_value = Exact(prepared_value).prepare(self) query_frag = filter_types[filter_type] % prepared_value if len(query_frag) and not isinstance(value, Raw): if not query_frag.startswith('(') and not query_frag.endswith(')'): query_frag = "(%s)" % query_frag return u"%s%s" % (index_fieldname, query_frag) def build_alt_parser_query(self, parser_name, query_string='', **kwargs): if query_string: kwargs['v'] = query_string kwarg_bits = [] for key in sorted(kwargs.keys()): if isinstance(kwargs[key], six.string_types) and ' ' in kwargs[key]: kwarg_bits.append(u"%s='%s'" % (key, kwargs[key])) else: kwarg_bits.append(u"%s=%s" % (key, kwargs[key])) return u"{!%s %s}" % (parser_name, ' '.join(kwarg_bits)) def build_params(self, spelling_query=None, **kwargs): search_kwargs = { 'start_offset': self.start_offset, 'result_class': self.result_class } order_by_list = None if self.order_by: if order_by_list is None: order_by_list = [] for field in self.order_by: direction = 'asc' if field.startswith('-'): direction = 'desc' field = field[1:] order_by_list.append((field, direction)) search_kwargs['sort_by'] = order_by_list if self.date_facets: search_kwargs['date_facets'] = self.date_facets if self.distance_point: search_kwargs['distance_point'] = self.distance_point if self.dwithin: search_kwargs['dwithin'] = self.dwithin if self.end_offset is not None: search_kwargs['end_offset'] = self.end_offset if self.facets: search_kwargs['facets'] = self.facets if self.fields: search_kwargs['fields'] = self.fields if self.highlight: search_kwargs['highlight'] = self.highlight if self.models: search_kwargs['models'] = self.models if self.narrow_queries: search_kwargs['narrow_queries'] = self.narrow_queries if self.query_facets: search_kwargs['query_facets'] = self.query_facets if self.within: search_kwargs['within'] = self.within if spelling_query: search_kwargs['spelling_query'] = spelling_query elif self.spelling_query: search_kwargs['spelling_query'] = self.spelling_query return search_kwargs def run(self, spelling_query=None, **kwargs): """Builds and executes the query. Returns a list of search results.""" final_query = self.build_query() search_kwargs = self.build_params(spelling_query, **kwargs) if kwargs: search_kwargs.update(kwargs) results = self.backend.search(final_query, **search_kwargs) self._results = results.get('results', []) self._hit_count = results.get('hits', 0) self._facet_counts = self.post_process_facets(results) self._spelling_suggestion = results.get('spelling_suggestion', None) def run_mlt(self, **kwargs): """Builds and executes the query. Returns a list of search results.""" if self._more_like_this is False or self._mlt_instance is None: raise MoreLikeThisError("No instance was provided to determine 'More Like This' results.") additional_query_string = self.build_query() search_kwargs = { 'start_offset': self.start_offset, 'result_class': self.result_class, 'models': self.models } if self.end_offset is not None: search_kwargs['end_offset'] = self.end_offset - self.start_offset results = self.backend.more_like_this(self._mlt_instance, additional_query_string, **search_kwargs) self._results = results.get('results', []) self._hit_count = results.get('hits', 0) class ElasticsearchSearchEngine(BaseEngine): backend = ElasticsearchSearchBackend query = ElasticsearchSearchQuery django-haystack-2.8.0/haystack/backends/simple_backend.py000066400000000000000000000101641325051407000234620ustar00rootroot00000000000000# encoding: utf-8 """ A very basic, ORM-based backend for simple search during tests. """ from __future__ import absolute_import, division, print_function, unicode_literals from warnings import warn from django.conf import settings from django.db.models import Q from django.utils import six from haystack import connections from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, log_query, SearchNode from haystack.inputs import PythonData from haystack.models import SearchResult from haystack.utils import get_model_ct_tuple if settings.DEBUG: import logging class NullHandler(logging.Handler): def emit(self, record): pass ch = logging.StreamHandler() ch.setLevel(logging.WARNING) ch.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')) logger = logging.getLogger('haystack.simple_backend') logger.setLevel(logging.WARNING) logger.addHandler(NullHandler()) logger.addHandler(ch) else: logger = None class SimpleSearchBackend(BaseSearchBackend): def update(self, indexer, iterable, commit=True): warn('update is not implemented in this backend') def remove(self, obj, commit=True): warn('remove is not implemented in this backend') def clear(self, models=None, commit=True): warn('clear is not implemented in this backend') @log_query def search(self, query_string, **kwargs): hits = 0 results = [] result_class = SearchResult models = connections[self.connection_alias].get_unified_index().get_indexed_models() if kwargs.get('result_class'): result_class = kwargs['result_class'] if kwargs.get('models'): models = kwargs['models'] if query_string: for model in models: if query_string == '*': qs = model.objects.all() else: for term in query_string.split(): queries = [] for field in model._meta.fields: if hasattr(field, 'related'): continue if not field.get_internal_type() in ('TextField', 'CharField', 'SlugField'): continue queries.append(Q(**{'%s__icontains' % field.name: term})) qs = model.objects.filter(six.moves.reduce(lambda x, y: x | y, queries)) hits += len(qs) for match in qs: match.__dict__.pop('score', None) app_label, model_name = get_model_ct_tuple(match) result = result_class(app_label, model_name, match.pk, 0, **match.__dict__) # For efficiency. result._model = match.__class__ result._object = match results.append(result) return { 'results': results, 'hits': hits, } def prep_value(self, db_field, value): return value def more_like_this(self, model_instance, additional_query_string=None, start_offset=0, end_offset=None, limit_to_registered_models=None, result_class=None, **kwargs): return { 'results': [], 'hits': 0 } class SimpleSearchQuery(BaseSearchQuery): def build_query(self): if not self.query_filter: return '*' return self._build_sub_query(self.query_filter) def _build_sub_query(self, search_node): term_list = [] for child in search_node.children: if isinstance(child, SearchNode): term_list.append(self._build_sub_query(child)) else: value = child[1] if not hasattr(value, 'input_type_name'): value = PythonData(value) term_list.append(value.prepare(self)) return (' ').join(map(six.text_type, term_list)) class SimpleEngine(BaseEngine): backend = SimpleSearchBackend query = SimpleSearchQuery django-haystack-2.8.0/haystack/backends/solr_backend.py000066400000000000000000001011341325051407000231460ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import warnings from django.conf import settings from django.core.exceptions import ImproperlyConfigured from django.utils import six import haystack from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, EmptyResults, log_query from haystack.constants import DJANGO_CT, DJANGO_ID, ID from haystack.exceptions import MissingDependency, MoreLikeThisError, SkipDocument from haystack.inputs import Clean, Exact, PythonData, Raw from haystack.models import SearchResult from haystack.utils import log as logging from haystack.utils import get_identifier, get_model_ct from haystack.utils.app_loading import haystack_get_model try: from pysolr import Solr, SolrError except ImportError: raise MissingDependency("The 'solr' backend requires the installation of 'pysolr'. Please refer to the documentation.") class SolrSearchBackend(BaseSearchBackend): # Word reserved by Solr for special use. RESERVED_WORDS = ( 'AND', 'NOT', 'OR', 'TO', ) # Characters reserved by Solr for special use. # The '\\' must come first, so as not to overwrite the other slash replacements. RESERVED_CHARACTERS = ( '\\', '+', '-', '&&', '||', '!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*', '?', ':', '/', ) def __init__(self, connection_alias, **connection_options): super(SolrSearchBackend, self).__init__(connection_alias, **connection_options) if 'URL' not in connection_options: raise ImproperlyConfigured("You must specify a 'URL' in your settings for connection '%s'." % connection_alias) self.collate = connection_options.get('COLLATE_SPELLING', True) self.conn = Solr(connection_options['URL'], timeout=self.timeout, **connection_options.get('KWARGS', {})) self.log = logging.getLogger('haystack') def update(self, index, iterable, commit=True): docs = [] for obj in iterable: try: docs.append(index.full_prepare(obj)) except SkipDocument: self.log.debug(u"Indexing for object `%s` skipped", obj) except UnicodeDecodeError: if not self.silently_fail: raise # We'll log the object identifier but won't include the actual object # to avoid the possibility of that generating encoding errors while # processing the log message: self.log.error(u"UnicodeDecodeError while preparing object for update", exc_info=True, extra={"data": {"index": index, "object": get_identifier(obj)}}) if len(docs) > 0: try: self.conn.add(docs, commit=commit, boost=index.get_field_weights()) except (IOError, SolrError) as e: if not self.silently_fail: raise self.log.error("Failed to add documents to Solr: %s", e, exc_info=True) def remove(self, obj_or_string, commit=True): solr_id = get_identifier(obj_or_string) try: kwargs = { 'commit': commit, 'id': solr_id } self.conn.delete(**kwargs) except (IOError, SolrError) as e: if not self.silently_fail: raise self.log.error("Failed to remove document '%s' from Solr: %s", solr_id, e, exc_info=True) def clear(self, models=None, commit=True): if models is not None: assert isinstance(models, (list, tuple)) try: if models is None: # *:* matches all docs in Solr self.conn.delete(q='*:*', commit=commit) else: models_to_delete = [] for model in models: models_to_delete.append("%s:%s" % (DJANGO_CT, get_model_ct(model))) self.conn.delete(q=" OR ".join(models_to_delete), commit=commit) if commit: # Run an optimize post-clear. http://wiki.apache.org/solr/FAQ#head-9aafb5d8dff5308e8ea4fcf4b71f19f029c4bb99 self.conn.optimize() except (IOError, SolrError) as e: if not self.silently_fail: raise if models is not None: self.log.error("Failed to clear Solr index of models '%s': %s", ','.join(models_to_delete), e, exc_info=True) else: self.log.error("Failed to clear Solr index: %s", e, exc_info=True) @log_query def search(self, query_string, **kwargs): if len(query_string) == 0: return { 'results': [], 'hits': 0, } search_kwargs = self.build_search_kwargs(query_string, **kwargs) try: raw_results = self.conn.search(query_string, **search_kwargs) except (IOError, SolrError) as e: if not self.silently_fail: raise self.log.error("Failed to query Solr using '%s': %s", query_string, e, exc_info=True) raw_results = EmptyResults() return self._process_results(raw_results, highlight=kwargs.get('highlight'), result_class=kwargs.get('result_class', SearchResult), distance_point=kwargs.get('distance_point')) def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None, fields='', highlight=False, facets=None, date_facets=None, query_facets=None, narrow_queries=None, spelling_query=None, within=None, dwithin=None, distance_point=None, models=None, limit_to_registered_models=None, result_class=None, stats=None, collate=None, **extra_kwargs): index = haystack.connections[self.connection_alias].get_unified_index() kwargs = { 'fl': '* score', 'df': index.document_field, } if fields: if isinstance(fields, (list, set)): fields = " ".join(fields) kwargs['fl'] = fields if sort_by is not None: if sort_by in ['distance asc', 'distance desc'] and distance_point: # Do the geo-enabled sort. lng, lat = distance_point['point'].coords kwargs['sfield'] = distance_point['field'] kwargs['pt'] = '%s,%s' % (lat, lng) if sort_by == 'distance asc': kwargs['sort'] = 'geodist() asc' else: kwargs['sort'] = 'geodist() desc' else: if sort_by.startswith('distance '): warnings.warn("In order to sort by distance, you must call the '.distance(...)' method.") # Regular sorting. kwargs['sort'] = sort_by if start_offset is not None: kwargs['start'] = start_offset if end_offset is not None: kwargs['rows'] = end_offset - start_offset if highlight: # `highlight` can either be True or a dictionary containing custom parameters # which will be passed to the backend and may override our default settings: kwargs['hl'] = 'true' kwargs['hl.fragsize'] = '200' if isinstance(highlight, dict): # autoprefix highlighter options with 'hl.', all of them start with it anyway # this makes option dicts shorter: {'maxAnalyzedChars': 42} # and lets some of options be used as keyword arguments: `.highlight(preserveMulti=False)` kwargs.update({ key if key.startswith("hl.") else ('hl.' + key): highlight[key] for key in highlight.keys() }) if collate is None: collate = self.collate if self.include_spelling is True: kwargs['spellcheck'] = 'true' kwargs['spellcheck.collate'] = str(collate).lower() kwargs['spellcheck.count'] = 1 if spelling_query: kwargs['spellcheck.q'] = spelling_query if facets is not None: kwargs['facet'] = 'on' kwargs['facet.field'] = facets.keys() for facet_field, options in facets.items(): for key, value in options.items(): kwargs['f.%s.facet.%s' % (facet_field, key)] = self.conn._from_python(value) if date_facets is not None: kwargs['facet'] = 'on' kwargs['facet.date'] = date_facets.keys() kwargs['facet.date.other'] = 'none' for key, value in date_facets.items(): kwargs["f.%s.facet.date.start" % key] = self.conn._from_python(value.get('start_date')) kwargs["f.%s.facet.date.end" % key] = self.conn._from_python(value.get('end_date')) gap_by_string = value.get('gap_by').upper() gap_string = "%d%s" % (value.get('gap_amount'), gap_by_string) if value.get('gap_amount') != 1: gap_string += "S" kwargs["f.%s.facet.date.gap" % key] = '+%s/%s' % (gap_string, gap_by_string) if query_facets is not None: kwargs['facet'] = 'on' kwargs['facet.query'] = ["%s:%s" % (field, value) for field, value in query_facets] if limit_to_registered_models is None: limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) if models and len(models): model_choices = sorted(get_model_ct(model) for model in models) elif limit_to_registered_models: # Using narrow queries, limit the results to only models handled # with the current routers. model_choices = self.build_models_list() else: model_choices = [] if len(model_choices) > 0: if narrow_queries is None: narrow_queries = set() narrow_queries.add('%s:(%s)' % (DJANGO_CT, ' OR '.join(model_choices))) if narrow_queries is not None: kwargs['fq'] = list(narrow_queries) if stats: kwargs['stats'] = "true" for k in stats.keys(): kwargs['stats.field'] = k for facet in stats[k]: kwargs['f.%s.stats.facet' % k] = facet if within is not None: from haystack.utils.geo import generate_bounding_box kwargs.setdefault('fq', []) ((min_lat, min_lng), (max_lat, max_lng)) = generate_bounding_box(within['point_1'], within['point_2']) # Bounding boxes are min, min TO max, max. Solr's wiki was *NOT* # very clear on this. bbox = '%s:[%s,%s TO %s,%s]' % (within['field'], min_lat, min_lng, max_lat, max_lng) kwargs['fq'].append(bbox) if dwithin is not None: kwargs.setdefault('fq', []) lng, lat = dwithin['point'].coords geofilt = '{!geofilt pt=%s,%s sfield=%s d=%s}' % (lat, lng, dwithin['field'], dwithin['distance'].km) kwargs['fq'].append(geofilt) # Check to see if the backend should try to include distances # (Solr 4.X+) in the results. if self.distance_available and distance_point: # In early testing, you can't just hand Solr 4.X a proper bounding box # & request distances. To enable native distance would take calculating # a center point & a radius off the user-provided box, which kinda # sucks. We'll avoid it for now, since Solr 4.x's release will be some # time yet. # kwargs['fl'] += ' _dist_:geodist()' pass if extra_kwargs: kwargs.update(extra_kwargs) return kwargs def more_like_this(self, model_instance, additional_query_string=None, start_offset=0, end_offset=None, models=None, limit_to_registered_models=None, result_class=None, **kwargs): from haystack import connections # Deferred models will have a different class ("RealClass_Deferred_fieldname") # which won't be in our registry: model_klass = model_instance._meta.concrete_model index = connections[self.connection_alias].get_unified_index().get_index(model_klass) field_name = index.get_content_field() params = { 'fl': '*,score', } if start_offset is not None: params['start'] = start_offset if end_offset is not None: params['rows'] = end_offset narrow_queries = set() if limit_to_registered_models is None: limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) if models and len(models): model_choices = sorted(get_model_ct(model) for model in models) elif limit_to_registered_models: # Using narrow queries, limit the results to only models handled # with the current routers. model_choices = self.build_models_list() else: model_choices = [] if len(model_choices) > 0: if narrow_queries is None: narrow_queries = set() narrow_queries.add('%s:(%s)' % (DJANGO_CT, ' OR '.join(model_choices))) if additional_query_string: narrow_queries.add(additional_query_string) if narrow_queries: params['fq'] = list(narrow_queries) query = "%s:%s" % (ID, get_identifier(model_instance)) try: raw_results = self.conn.more_like_this(query, field_name, **params) except (IOError, SolrError) as e: if not self.silently_fail: raise self.log.error("Failed to fetch More Like This from Solr for document '%s': %s", query, e, exc_info=True) raw_results = EmptyResults() return self._process_results(raw_results, result_class=result_class) def _process_results(self, raw_results, highlight=False, result_class=None, distance_point=None): from haystack import connections results = [] hits = raw_results.hits facets = {} stats = {} spelling_suggestion = spelling_suggestions = None if result_class is None: result_class = SearchResult if hasattr(raw_results, 'stats'): stats = raw_results.stats.get('stats_fields', {}) if hasattr(raw_results, 'facets'): facets = { 'fields': raw_results.facets.get('facet_fields', {}), 'dates': raw_results.facets.get('facet_dates', {}), 'queries': raw_results.facets.get('facet_queries', {}), } for key in ['fields']: for facet_field in facets[key]: # Convert to a two-tuple, as Solr's json format returns a list of # pairs. facets[key][facet_field] = list(zip(facets[key][facet_field][::2], facets[key][facet_field][1::2])) if self.include_spelling and hasattr(raw_results, 'spellcheck'): try: spelling_suggestions = self.extract_spelling_suggestions(raw_results) except Exception as exc: self.log.error('Error extracting spelling suggestions: %s', exc, exc_info=True, extra={'data': {'spellcheck': raw_results.spellcheck}}) if not self.silently_fail: raise spelling_suggestions = None if spelling_suggestions: # Maintain compatibility with older versions of Haystack which returned a single suggestion: spelling_suggestion = spelling_suggestions[-1] assert isinstance(spelling_suggestion, six.string_types) else: spelling_suggestion = None unified_index = connections[self.connection_alias].get_unified_index() indexed_models = unified_index.get_indexed_models() for raw_result in raw_results.docs: app_label, model_name = raw_result[DJANGO_CT].split('.') additional_fields = {} model = haystack_get_model(app_label, model_name) if model and model in indexed_models: index = unified_index.get_index(model) index_field_map = index.field_map for key, value in raw_result.items(): string_key = str(key) # re-map key if alternate name used if string_key in index_field_map: string_key = index_field_map[key] if string_key in index.fields and hasattr(index.fields[string_key], 'convert'): additional_fields[string_key] = index.fields[string_key].convert(value) else: additional_fields[string_key] = self.conn._to_python(value) del(additional_fields[DJANGO_CT]) del(additional_fields[DJANGO_ID]) del(additional_fields['score']) if raw_result[ID] in getattr(raw_results, 'highlighting', {}): additional_fields['highlighted'] = raw_results.highlighting[raw_result[ID]] if distance_point: additional_fields['_point_of_origin'] = distance_point if raw_result.get('__dist__'): from haystack.utils.geo import Distance additional_fields['_distance'] = Distance(km=float(raw_result['__dist__'])) else: additional_fields['_distance'] = None result = result_class(app_label, model_name, raw_result[DJANGO_ID], raw_result['score'], **additional_fields) results.append(result) else: hits -= 1 return { 'results': results, 'hits': hits, 'stats': stats, 'facets': facets, 'spelling_suggestion': spelling_suggestion, 'spelling_suggestions': spelling_suggestions, } def extract_spelling_suggestions(self, raw_results): # There are many different formats for Legacy, 6.4, and 6.5 e.g. # https://issues.apache.org/jira/browse/SOLR-3029 and depending on the # version and configuration the response format may be a dict of dicts, # a list of dicts, or a list of strings. collations = raw_results.spellcheck.get('collations', None) suggestions = raw_results.spellcheck.get('suggestions', None) # We'll collect multiple suggestions here. For backwards # compatibility with older versions of Haystack we'll still return # only a single suggestion but in the future we can expose all of # them. spelling_suggestions = [] if collations: if isinstance(collations, dict): # Solr 6.5 collation_values = collations['collation'] if isinstance(collation_values, six.string_types): collation_values = [collation_values] elif isinstance(collation_values, dict): # spellcheck.collateExtendedResults changes the format to a dictionary: collation_values = [collation_values['collationQuery']] elif isinstance(collations[1], dict): # Solr 6.4 collation_values = collations else: # Older versions of Solr collation_values = collations[-1:] for i in collation_values: # Depending on the options the values are either simple strings or dictionaries: spelling_suggestions.append(i['collationQuery'] if isinstance(i, dict) else i) elif suggestions: if isinstance(suggestions, dict): for i in suggestions.values(): for j in i['suggestion']: if isinstance(j, dict): spelling_suggestions.append(j['word']) else: spelling_suggestions.append(j) elif isinstance(suggestions[0], six.string_types) and isinstance(suggestions[1], dict): # Solr 6.4 uses a list of paired (word, dictionary) pairs: for suggestion in suggestions: if isinstance(suggestion, dict): for i in suggestion['suggestion']: if isinstance(i, dict): spelling_suggestions.append(i['word']) else: spelling_suggestions.append(i) else: # Legacy Solr spelling_suggestions.append(suggestions[-1]) return spelling_suggestions def build_schema(self, fields): content_field_name = '' schema_fields = [] for field_name, field_class in fields.items(): field_data = { 'field_name': field_class.index_fieldname, 'type': 'text_en', 'indexed': 'true', 'stored': 'true', 'multi_valued': 'false', } if field_class.document is True: content_field_name = field_class.index_fieldname # DRL_FIXME: Perhaps move to something where, if none of these # checks succeed, call a custom method on the form that # returns, per-backend, the right type of storage? if field_class.field_type in ['date', 'datetime']: field_data['type'] = 'date' elif field_class.field_type == 'integer': field_data['type'] = 'long' elif field_class.field_type == 'float': field_data['type'] = 'float' elif field_class.field_type == 'boolean': field_data['type'] = 'boolean' elif field_class.field_type == 'ngram': field_data['type'] = 'ngram' elif field_class.field_type == 'edge_ngram': field_data['type'] = 'edge_ngram' elif field_class.field_type == 'location': field_data['type'] = 'location' if field_class.is_multivalued: field_data['multi_valued'] = 'true' if field_class.stored is False: field_data['stored'] = 'false' # Do this last to override `text` fields. if field_class.indexed is False: field_data['indexed'] = 'false' # If it's text and not being indexed, we probably don't want # to do the normal lowercase/tokenize/stemming/etc. dance. if field_data['type'] == 'text_en': field_data['type'] = 'string' # If it's a ``FacetField``, make sure we don't postprocess it. if hasattr(field_class, 'facet_for'): # If it's text, it ought to be a string. if field_data['type'] == 'text_en': field_data['type'] = 'string' schema_fields.append(field_data) return (content_field_name, schema_fields) def extract_file_contents(self, file_obj, **kwargs): """Extract text and metadata from a structured file (PDF, MS Word, etc.) Uses the Solr ExtractingRequestHandler, which is based on Apache Tika. See the Solr wiki for details: http://wiki.apache.org/solr/ExtractingRequestHandler Due to the way the ExtractingRequestHandler is implemented it completely replaces the normal Haystack indexing process with several unfortunate restrictions: only one file per request, the extracted data is added to the index with no ability to modify it, etc. To simplify the process and allow for more advanced use we'll run using the extract-only mode to return the extracted data without adding it to the index so we can then use it within Haystack's normal templating process. Returns None if metadata cannot be extracted; otherwise returns a dictionary containing at least two keys: :contents: Extracted full-text content, if applicable :metadata: key:value pairs of text strings """ try: return self.conn.extract(file_obj, **kwargs) except Exception as e: self.log.warning(u"Unable to extract file contents: %s", e, exc_info=True, extra={"data": {"file": file_obj}}) return None class SolrSearchQuery(BaseSearchQuery): def matching_all_fragment(self): return '*:*' def build_query_fragment(self, field, filter_type, value): from haystack import connections query_frag = '' if not hasattr(value, 'input_type_name'): # Handle when we've got a ``ValuesListQuerySet``... if hasattr(value, 'values_list'): value = list(value) if isinstance(value, six.string_types): # It's not an ``InputType``. Assume ``Clean``. value = Clean(value) else: value = PythonData(value) # Prepare the query using the InputType. prepared_value = value.prepare(self) if not isinstance(prepared_value, (set, list, tuple)): # Then convert whatever we get back to what pysolr wants if needed. prepared_value = self.backend.conn._from_python(prepared_value) # 'content' is a special reserved word, much like 'pk' in # Django's ORM layer. It indicates 'no special field'. if field == 'content': index_fieldname = '' else: index_fieldname = u'%s:' % connections[self._using].get_unified_index().get_index_fieldname(field) filter_types = { 'content': u'%s', 'contains': u'*%s*', 'endswith': u'*%s', 'startswith': u'%s*', 'exact': u'%s', 'gt': u'{%s TO *}', 'gte': u'[%s TO *]', 'lt': u'{* TO %s}', 'lte': u'[* TO %s]', 'fuzzy': u'%s~', } if value.post_process is False: query_frag = prepared_value else: if filter_type in ['content', 'contains', 'startswith', 'endswith', 'fuzzy']: if value.input_type_name == 'exact': query_frag = prepared_value else: # Iterate over terms & incorportate the converted form of each into the query. terms = [] for possible_value in prepared_value.split(' '): terms.append(filter_types[filter_type] % self.backend.conn._from_python(possible_value)) if len(terms) == 1: query_frag = terms[0] else: query_frag = u"(%s)" % " AND ".join(terms) elif filter_type == 'in': in_options = [] if not prepared_value: query_frag = u'(!*:*)' else: for possible_value in prepared_value: in_options.append(u'"%s"' % self.backend.conn._from_python(possible_value)) query_frag = u"(%s)" % " OR ".join(in_options) elif filter_type == 'range': start = self.backend.conn._from_python(prepared_value[0]) end = self.backend.conn._from_python(prepared_value[1]) query_frag = u'["%s" TO "%s"]' % (start, end) elif filter_type == 'exact': if value.input_type_name == 'exact': query_frag = prepared_value else: prepared_value = Exact(prepared_value).prepare(self) query_frag = filter_types[filter_type] % prepared_value else: if value.input_type_name != 'exact': prepared_value = Exact(prepared_value).prepare(self) query_frag = filter_types[filter_type] % prepared_value if len(query_frag) and not isinstance(value, Raw): if not query_frag.startswith('(') and not query_frag.endswith(')'): query_frag = "(%s)" % query_frag return u"%s%s" % (index_fieldname, query_frag) def build_alt_parser_query(self, parser_name, query_string='', **kwargs): if query_string: query_string = Clean(query_string).prepare(self) kwarg_bits = [] for key in sorted(kwargs.keys()): if isinstance(kwargs[key], six.string_types) and ' ' in kwargs[key]: kwarg_bits.append(u"%s='%s'" % (key, kwargs[key])) else: kwarg_bits.append(u"%s=%s" % (key, kwargs[key])) return u'_query_:"{!%s %s}%s"' % (parser_name, Clean(' '.join(kwarg_bits)), query_string) def build_params(self, spelling_query=None, **kwargs): search_kwargs = { 'start_offset': self.start_offset, 'result_class': self.result_class } order_by_list = None if self.order_by: if order_by_list is None: order_by_list = [] for order_by in self.order_by: if order_by.startswith('-'): order_by_list.append('%s desc' % order_by[1:]) else: order_by_list.append('%s asc' % order_by) search_kwargs['sort_by'] = ", ".join(order_by_list) if self.date_facets: search_kwargs['date_facets'] = self.date_facets if self.distance_point: search_kwargs['distance_point'] = self.distance_point if self.dwithin: search_kwargs['dwithin'] = self.dwithin if self.end_offset is not None: search_kwargs['end_offset'] = self.end_offset if self.facets: search_kwargs['facets'] = self.facets if self.fields: search_kwargs['fields'] = self.fields if self.highlight: search_kwargs['highlight'] = self.highlight if self.models: search_kwargs['models'] = self.models if self.narrow_queries: search_kwargs['narrow_queries'] = self.narrow_queries if self.query_facets: search_kwargs['query_facets'] = self.query_facets if self.within: search_kwargs['within'] = self.within if spelling_query: search_kwargs['spelling_query'] = spelling_query elif self.spelling_query: search_kwargs['spelling_query'] = self.spelling_query if self.stats: search_kwargs['stats'] = self.stats return search_kwargs def run(self, spelling_query=None, **kwargs): """Builds and executes the query. Returns a list of search results.""" final_query = self.build_query() search_kwargs = self.build_params(spelling_query, **kwargs) if kwargs: search_kwargs.update(kwargs) results = self.backend.search(final_query, **search_kwargs) self._results = results.get('results', []) self._hit_count = results.get('hits', 0) self._facet_counts = self.post_process_facets(results) self._stats = results.get('stats', {}) self._spelling_suggestion = results.get('spelling_suggestion', None) def run_mlt(self, **kwargs): """Builds and executes the query. Returns a list of search results.""" if self._more_like_this is False or self._mlt_instance is None: raise MoreLikeThisError("No instance was provided to determine 'More Like This' results.") additional_query_string = self.build_query() search_kwargs = { 'start_offset': self.start_offset, 'result_class': self.result_class, 'models': self.models } if self.end_offset is not None: search_kwargs['end_offset'] = self.end_offset - self.start_offset results = self.backend.more_like_this(self._mlt_instance, additional_query_string, **search_kwargs) self._results = results.get('results', []) self._hit_count = results.get('hits', 0) class SolrEngine(BaseEngine): backend = SolrSearchBackend query = SolrSearchQuery django-haystack-2.8.0/haystack/backends/whoosh_backend.py000066400000000000000000001024311325051407000234770ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import json import os import re import shutil import threading import warnings from django.conf import settings from django.core.exceptions import ImproperlyConfigured from django.utils import six from django.utils.datetime_safe import datetime from django.utils.encoding import force_text from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, EmptyResults, log_query from haystack.constants import DJANGO_CT, DJANGO_ID, ID from haystack.exceptions import MissingDependency, SearchBackendError, SkipDocument from haystack.inputs import Clean, Exact, PythonData, Raw from haystack.models import SearchResult from haystack.utils import log as logging from haystack.utils import get_identifier, get_model_ct from haystack.utils.app_loading import haystack_get_model try: import whoosh except ImportError: raise MissingDependency("The 'whoosh' backend requires the installation of 'Whoosh'. Please refer to the documentation.") # Handle minimum requirement. if not hasattr(whoosh, '__version__') or whoosh.__version__ < (2, 5, 0): raise MissingDependency("The 'whoosh' backend requires version 2.5.0 or greater.") # Bubble up the correct error. from whoosh import index from whoosh.analysis import StemmingAnalyzer from whoosh.fields import ID as WHOOSH_ID from whoosh.fields import BOOLEAN, DATETIME, IDLIST, KEYWORD, NGRAM, NGRAMWORDS, NUMERIC, Schema, TEXT from whoosh.filedb.filestore import FileStorage, RamStorage from whoosh.highlight import highlight as whoosh_highlight from whoosh.highlight import ContextFragmenter, HtmlFormatter from whoosh.qparser import QueryParser from whoosh.searching import ResultsPage from whoosh.writing import AsyncWriter DATETIME_REGEX = re.compile('^(?P\d{4})-(?P\d{2})-(?P\d{2})T(?P\d{2}):(?P\d{2}):(?P\d{2})(\.\d{3,6}Z?)?$') LOCALS = threading.local() LOCALS.RAM_STORE = None class WhooshHtmlFormatter(HtmlFormatter): """ This is a HtmlFormatter simpler than the whoosh.HtmlFormatter. We use it to have consistent results across backends. Specifically, Solr, Xapian and Elasticsearch are using this formatting. """ template = '<%(tag)s>%(t)s' class WhooshSearchBackend(BaseSearchBackend): # Word reserved by Whoosh for special use. RESERVED_WORDS = ( 'AND', 'NOT', 'OR', 'TO', ) # Characters reserved by Whoosh for special use. # The '\\' must come first, so as not to overwrite the other slash replacements. RESERVED_CHARACTERS = ( '\\', '+', '-', '&&', '||', '!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*', '?', ':', '.', ) def __init__(self, connection_alias, **connection_options): super(WhooshSearchBackend, self).__init__(connection_alias, **connection_options) self.setup_complete = False self.use_file_storage = True self.post_limit = getattr(connection_options, 'POST_LIMIT', 128 * 1024 * 1024) self.path = connection_options.get('PATH') if connection_options.get('STORAGE', 'file') != 'file': self.use_file_storage = False if self.use_file_storage and not self.path: raise ImproperlyConfigured("You must specify a 'PATH' in your settings for connection '%s'." % connection_alias) self.log = logging.getLogger('haystack') def setup(self): """ Defers loading until needed. """ from haystack import connections new_index = False # Make sure the index is there. if self.use_file_storage and not os.path.exists(self.path): os.makedirs(self.path) new_index = True if self.use_file_storage and not os.access(self.path, os.W_OK): raise IOError("The path to your Whoosh index '%s' is not writable for the current user/group." % self.path) if self.use_file_storage: self.storage = FileStorage(self.path) else: global LOCALS if getattr(LOCALS, 'RAM_STORE', None) is None: LOCALS.RAM_STORE = RamStorage() self.storage = LOCALS.RAM_STORE self.content_field_name, self.schema = self.build_schema(connections[self.connection_alias].get_unified_index().all_searchfields()) self.parser = QueryParser(self.content_field_name, schema=self.schema) if new_index is True: self.index = self.storage.create_index(self.schema) else: try: self.index = self.storage.open_index(schema=self.schema) except index.EmptyIndexError: self.index = self.storage.create_index(self.schema) self.setup_complete = True def build_schema(self, fields): schema_fields = { ID: WHOOSH_ID(stored=True, unique=True), DJANGO_CT: WHOOSH_ID(stored=True), DJANGO_ID: WHOOSH_ID(stored=True), } # Grab the number of keys that are hard-coded into Haystack. # We'll use this to (possibly) fail slightly more gracefully later. initial_key_count = len(schema_fields) content_field_name = '' for field_name, field_class in fields.items(): if field_class.is_multivalued: if field_class.indexed is False: schema_fields[field_class.index_fieldname] = IDLIST(stored=True, field_boost=field_class.boost) else: schema_fields[field_class.index_fieldname] = KEYWORD(stored=True, commas=True, scorable=True, field_boost=field_class.boost) elif field_class.field_type in ['date', 'datetime']: schema_fields[field_class.index_fieldname] = DATETIME(stored=field_class.stored, sortable=True) elif field_class.field_type == 'integer': schema_fields[field_class.index_fieldname] = NUMERIC(stored=field_class.stored, numtype=int, field_boost=field_class.boost) elif field_class.field_type == 'float': schema_fields[field_class.index_fieldname] = NUMERIC(stored=field_class.stored, numtype=float, field_boost=field_class.boost) elif field_class.field_type == 'boolean': # Field boost isn't supported on BOOLEAN as of 1.8.2. schema_fields[field_class.index_fieldname] = BOOLEAN(stored=field_class.stored) elif field_class.field_type == 'ngram': schema_fields[field_class.index_fieldname] = NGRAM(minsize=3, maxsize=15, stored=field_class.stored, field_boost=field_class.boost) elif field_class.field_type == 'edge_ngram': schema_fields[field_class.index_fieldname] = NGRAMWORDS(minsize=2, maxsize=15, at='start', stored=field_class.stored, field_boost=field_class.boost) else: schema_fields[field_class.index_fieldname] = TEXT(stored=True, analyzer=StemmingAnalyzer(), field_boost=field_class.boost, sortable=True) if field_class.document is True: content_field_name = field_class.index_fieldname schema_fields[field_class.index_fieldname].spelling = True # Fail more gracefully than relying on the backend to die if no fields # are found. if len(schema_fields) <= initial_key_count: raise SearchBackendError("No fields were found in any search_indexes. Please correct this before attempting to search.") return (content_field_name, Schema(**schema_fields)) def update(self, index, iterable, commit=True): if not self.setup_complete: self.setup() self.index = self.index.refresh() writer = AsyncWriter(self.index) for obj in iterable: try: doc = index.full_prepare(obj) except SkipDocument: self.log.debug(u"Indexing for object `%s` skipped", obj) else: # Really make sure it's unicode, because Whoosh won't have it any # other way. for key in doc: doc[key] = self._from_python(doc[key]) # Document boosts aren't supported in Whoosh 2.5.0+. if 'boost' in doc: del doc['boost'] try: writer.update_document(**doc) except Exception as e: if not self.silently_fail: raise # We'll log the object identifier but won't include the actual object # to avoid the possibility of that generating encoding errors while # processing the log message: self.log.error(u"%s while preparing object for update" % e.__class__.__name__, exc_info=True, extra={"data": {"index": index, "object": get_identifier(obj)}}) if len(iterable) > 0: # For now, commit no matter what, as we run into locking issues otherwise. writer.commit() def remove(self, obj_or_string, commit=True): if not self.setup_complete: self.setup() self.index = self.index.refresh() whoosh_id = get_identifier(obj_or_string) try: self.index.delete_by_query(q=self.parser.parse(u'%s:"%s"' % (ID, whoosh_id))) except Exception as e: if not self.silently_fail: raise self.log.error("Failed to remove document '%s' from Whoosh: %s", whoosh_id, e, exc_info=True) def clear(self, models=None, commit=True): if not self.setup_complete: self.setup() self.index = self.index.refresh() if models is not None: assert isinstance(models, (list, tuple)) try: if models is None: self.delete_index() else: models_to_delete = [] for model in models: models_to_delete.append(u"%s:%s" % (DJANGO_CT, get_model_ct(model))) self.index.delete_by_query(q=self.parser.parse(u" OR ".join(models_to_delete))) except Exception as e: if not self.silently_fail: raise if models is not None: self.log.error("Failed to clear Whoosh index of models '%s': %s", ','.join(models_to_delete), e, exc_info=True) else: self.log.error("Failed to clear Whoosh index: %s", e, exc_info=True) def delete_index(self): # Per the Whoosh mailing list, if wiping out everything from the index, # it's much more efficient to simply delete the index files. if self.use_file_storage and os.path.exists(self.path): shutil.rmtree(self.path) elif not self.use_file_storage: self.storage.clean() # Recreate everything. self.setup() def optimize(self): if not self.setup_complete: self.setup() self.index = self.index.refresh() self.index.optimize() def calculate_page(self, start_offset=0, end_offset=None): # Prevent against Whoosh throwing an error. Requires an end_offset # greater than 0. if end_offset is not None and end_offset <= 0: end_offset = 1 # Determine the page. page_num = 0 if end_offset is None: end_offset = 1000000 if start_offset is None: start_offset = 0 page_length = end_offset - start_offset if page_length and page_length > 0: page_num = int(start_offset / page_length) # Increment because Whoosh uses 1-based page numbers. page_num += 1 return page_num, page_length @log_query def search(self, query_string, sort_by=None, start_offset=0, end_offset=None, fields='', highlight=False, facets=None, date_facets=None, query_facets=None, narrow_queries=None, spelling_query=None, within=None, dwithin=None, distance_point=None, models=None, limit_to_registered_models=None, result_class=None, **kwargs): if not self.setup_complete: self.setup() # A zero length query should return no results. if len(query_string) == 0: return { 'results': [], 'hits': 0, } query_string = force_text(query_string) # A one-character query (non-wildcard) gets nabbed by a stopwords # filter and should yield zero results. if len(query_string) <= 1 and query_string != u'*': return { 'results': [], 'hits': 0, } reverse = False if sort_by is not None: # Determine if we need to reverse the results and if Whoosh can # handle what it's being asked to sort by. Reversing is an # all-or-nothing action, unfortunately. sort_by_list = [] reverse_counter = 0 for order_by in sort_by: if order_by.startswith('-'): reverse_counter += 1 if reverse_counter and reverse_counter != len(sort_by): raise SearchBackendError("Whoosh requires all order_by fields" " to use the same sort direction") for order_by in sort_by: if order_by.startswith('-'): sort_by_list.append(order_by[1:]) if len(sort_by_list) == 1: reverse = True else: sort_by_list.append(order_by) if len(sort_by_list) == 1: reverse = False sort_by = sort_by_list if facets is not None: warnings.warn("Whoosh does not handle faceting.", Warning, stacklevel=2) if date_facets is not None: warnings.warn("Whoosh does not handle date faceting.", Warning, stacklevel=2) if query_facets is not None: warnings.warn("Whoosh does not handle query faceting.", Warning, stacklevel=2) narrowed_results = None self.index = self.index.refresh() if limit_to_registered_models is None: limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) if models and len(models): model_choices = sorted(get_model_ct(model) for model in models) elif limit_to_registered_models: # Using narrow queries, limit the results to only models handled # with the current routers. model_choices = self.build_models_list() else: model_choices = [] if len(model_choices) > 0: if narrow_queries is None: narrow_queries = set() narrow_queries.add(' OR '.join(['%s:%s' % (DJANGO_CT, rm) for rm in model_choices])) narrow_searcher = None if narrow_queries is not None: # Potentially expensive? I don't see another way to do it in Whoosh... narrow_searcher = self.index.searcher() for nq in narrow_queries: recent_narrowed_results = narrow_searcher.search(self.parser.parse(force_text(nq)), limit=None) if len(recent_narrowed_results) <= 0: return { 'results': [], 'hits': 0, } if narrowed_results: narrowed_results.filter(recent_narrowed_results) else: narrowed_results = recent_narrowed_results self.index = self.index.refresh() if self.index.doc_count(): searcher = self.index.searcher() parsed_query = self.parser.parse(query_string) # In the event of an invalid/stopworded query, recover gracefully. if parsed_query is None: return { 'results': [], 'hits': 0, } page_num, page_length = self.calculate_page(start_offset, end_offset) search_kwargs = { 'pagelen': page_length, 'sortedby': sort_by, 'reverse': reverse, } # Handle the case where the results have been narrowed. if narrowed_results is not None: search_kwargs['filter'] = narrowed_results try: raw_page = searcher.search_page( parsed_query, page_num, **search_kwargs ) except ValueError: if not self.silently_fail: raise return { 'results': [], 'hits': 0, 'spelling_suggestion': None, } # Because as of Whoosh 2.5.1, it will return the wrong page of # results if you request something too high. :( if raw_page.pagenum < page_num: return { 'results': [], 'hits': 0, 'spelling_suggestion': None, } results = self._process_results(raw_page, highlight=highlight, query_string=query_string, spelling_query=spelling_query, result_class=result_class) searcher.close() if hasattr(narrow_searcher, 'close'): narrow_searcher.close() return results else: if self.include_spelling: if spelling_query: spelling_suggestion = self.create_spelling_suggestion(spelling_query) else: spelling_suggestion = self.create_spelling_suggestion(query_string) else: spelling_suggestion = None return { 'results': [], 'hits': 0, 'spelling_suggestion': spelling_suggestion, } def more_like_this(self, model_instance, additional_query_string=None, start_offset=0, end_offset=None, models=None, limit_to_registered_models=None, result_class=None, **kwargs): if not self.setup_complete: self.setup() field_name = self.content_field_name narrow_queries = set() narrowed_results = None self.index = self.index.refresh() if limit_to_registered_models is None: limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) if models and len(models): model_choices = sorted(get_model_ct(model) for model in models) elif limit_to_registered_models: # Using narrow queries, limit the results to only models handled # with the current routers. model_choices = self.build_models_list() else: model_choices = [] if len(model_choices) > 0: if narrow_queries is None: narrow_queries = set() narrow_queries.add(' OR '.join(['%s:%s' % (DJANGO_CT, rm) for rm in model_choices])) if additional_query_string and additional_query_string != '*': narrow_queries.add(additional_query_string) narrow_searcher = None if narrow_queries is not None: # Potentially expensive? I don't see another way to do it in Whoosh... narrow_searcher = self.index.searcher() for nq in narrow_queries: recent_narrowed_results = narrow_searcher.search(self.parser.parse(force_text(nq)), limit=None) if len(recent_narrowed_results) <= 0: return { 'results': [], 'hits': 0, } if narrowed_results: narrowed_results.filter(recent_narrowed_results) else: narrowed_results = recent_narrowed_results page_num, page_length = self.calculate_page(start_offset, end_offset) self.index = self.index.refresh() raw_results = EmptyResults() searcher = None if self.index.doc_count(): query = "%s:%s" % (ID, get_identifier(model_instance)) searcher = self.index.searcher() parsed_query = self.parser.parse(query) results = searcher.search(parsed_query) if len(results): raw_results = results[0].more_like_this(field_name, top=end_offset) # Handle the case where the results have been narrowed. if narrowed_results is not None and hasattr(raw_results, 'filter'): raw_results.filter(narrowed_results) try: raw_page = ResultsPage(raw_results, page_num, page_length) except ValueError: if not self.silently_fail: raise return { 'results': [], 'hits': 0, 'spelling_suggestion': None, } # Because as of Whoosh 2.5.1, it will return the wrong page of # results if you request something too high. :( if raw_page.pagenum < page_num: return { 'results': [], 'hits': 0, 'spelling_suggestion': None, } results = self._process_results(raw_page, result_class=result_class) if searcher: searcher.close() if hasattr(narrow_searcher, 'close'): narrow_searcher.close() return results def _process_results(self, raw_page, highlight=False, query_string='', spelling_query=None, result_class=None): from haystack import connections results = [] # It's important to grab the hits first before slicing. Otherwise, this # can cause pagination failures. hits = len(raw_page) if result_class is None: result_class = SearchResult facets = {} spelling_suggestion = None unified_index = connections[self.connection_alias].get_unified_index() indexed_models = unified_index.get_indexed_models() for doc_offset, raw_result in enumerate(raw_page): score = raw_page.score(doc_offset) or 0 app_label, model_name = raw_result[DJANGO_CT].split('.') additional_fields = {} model = haystack_get_model(app_label, model_name) if model and model in indexed_models: for key, value in raw_result.items(): index = unified_index.get_index(model) string_key = str(key) if string_key in index.fields and hasattr(index.fields[string_key], 'convert'): # Special-cased due to the nature of KEYWORD fields. if index.fields[string_key].is_multivalued: if value is None or len(value) is 0: additional_fields[string_key] = [] else: additional_fields[string_key] = value.split(',') else: additional_fields[string_key] = index.fields[string_key].convert(value) else: additional_fields[string_key] = self._to_python(value) del(additional_fields[DJANGO_CT]) del(additional_fields[DJANGO_ID]) if highlight: sa = StemmingAnalyzer() formatter = WhooshHtmlFormatter('em') terms = [token.text for token in sa(query_string)] whoosh_result = whoosh_highlight( additional_fields.get(self.content_field_name), terms, sa, ContextFragmenter(), formatter ) additional_fields['highlighted'] = { self.content_field_name: [whoosh_result], } result = result_class(app_label, model_name, raw_result[DJANGO_ID], score, **additional_fields) results.append(result) else: hits -= 1 if self.include_spelling: if spelling_query: spelling_suggestion = self.create_spelling_suggestion(spelling_query) else: spelling_suggestion = self.create_spelling_suggestion(query_string) return { 'results': results, 'hits': hits, 'facets': facets, 'spelling_suggestion': spelling_suggestion, } def create_spelling_suggestion(self, query_string): spelling_suggestion = None reader = self.index.reader() corrector = reader.corrector(self.content_field_name) cleaned_query = force_text(query_string) if not query_string: return spelling_suggestion # Clean the string. for rev_word in self.RESERVED_WORDS: cleaned_query = cleaned_query.replace(rev_word, '') for rev_char in self.RESERVED_CHARACTERS: cleaned_query = cleaned_query.replace(rev_char, '') # Break it down. query_words = cleaned_query.split() suggested_words = [] for word in query_words: suggestions = corrector.suggest(word, limit=1) if len(suggestions) > 0: suggested_words.append(suggestions[0]) spelling_suggestion = ' '.join(suggested_words) return spelling_suggestion def _from_python(self, value): """ Converts Python values to a string for Whoosh. Code courtesy of pysolr. """ if hasattr(value, 'strftime'): if not hasattr(value, 'hour'): value = datetime(value.year, value.month, value.day, 0, 0, 0) elif isinstance(value, bool): if value: value = 'true' else: value = 'false' elif isinstance(value, (list, tuple)): value = u','.join([force_text(v) for v in value]) elif isinstance(value, (six.integer_types, float)): # Leave it alone. pass else: value = force_text(value) return value def _to_python(self, value): """ Converts values from Whoosh to native Python values. A port of the same method in pysolr, as they deal with data the same way. """ if value == 'true': return True elif value == 'false': return False if value and isinstance(value, six.string_types): possible_datetime = DATETIME_REGEX.search(value) if possible_datetime: date_values = possible_datetime.groupdict() for dk, dv in date_values.items(): date_values[dk] = int(dv) return datetime(date_values['year'], date_values['month'], date_values['day'], date_values['hour'], date_values['minute'], date_values['second']) try: # Attempt to use json to load the values. converted_value = json.loads(value) # Try to handle most built-in types. if isinstance(converted_value, (list, tuple, set, dict, six.integer_types, float, complex)): return converted_value except: # If it fails (SyntaxError or its ilk) or we don't trust it, # continue on. pass return value class WhooshSearchQuery(BaseSearchQuery): def _convert_datetime(self, date): if hasattr(date, 'hour'): return force_text(date.strftime('%Y%m%d%H%M%S')) else: return force_text(date.strftime('%Y%m%d000000')) def clean(self, query_fragment): """ Provides a mechanism for sanitizing user input before presenting the value to the backend. Whoosh 1.X differs here in that you can no longer use a backslash to escape reserved characters. Instead, the whole word should be quoted. """ words = query_fragment.split() cleaned_words = [] for word in words: if word in self.backend.RESERVED_WORDS: word = word.replace(word, word.lower()) for char in self.backend.RESERVED_CHARACTERS: if char in word: word = "'%s'" % word break cleaned_words.append(word) return ' '.join(cleaned_words) def build_query_fragment(self, field, filter_type, value): from haystack import connections query_frag = '' is_datetime = False if not hasattr(value, 'input_type_name'): # Handle when we've got a ``ValuesListQuerySet``... if hasattr(value, 'values_list'): value = list(value) if hasattr(value, 'strftime'): is_datetime = True if isinstance(value, six.string_types) and value != ' ': # It's not an ``InputType``. Assume ``Clean``. value = Clean(value) else: value = PythonData(value) # Prepare the query using the InputType. prepared_value = value.prepare(self) if not isinstance(prepared_value, (set, list, tuple)): # Then convert whatever we get back to what pysolr wants if needed. prepared_value = self.backend._from_python(prepared_value) # 'content' is a special reserved word, much like 'pk' in # Django's ORM layer. It indicates 'no special field'. if field == 'content': index_fieldname = '' else: index_fieldname = u'%s:' % connections[self._using].get_unified_index().get_index_fieldname(field) filter_types = { 'content': '%s', 'contains': '*%s*', 'endswith': "*%s", 'startswith': "%s*", 'exact': '%s', 'gt': "{%s to}", 'gte': "[%s to]", 'lt': "{to %s}", 'lte': "[to %s]", 'fuzzy': u'%s~', } if value.post_process is False: query_frag = prepared_value else: if filter_type in ['content', 'contains', 'startswith', 'endswith', 'fuzzy']: if value.input_type_name == 'exact': query_frag = prepared_value else: # Iterate over terms & incorportate the converted form of each into the query. terms = [] if isinstance(prepared_value, six.string_types): possible_values = prepared_value.split(' ') else: if is_datetime is True: prepared_value = self._convert_datetime(prepared_value) possible_values = [prepared_value] for possible_value in possible_values: terms.append(filter_types[filter_type] % self.backend._from_python(possible_value)) if len(terms) == 1: query_frag = terms[0] else: query_frag = u"(%s)" % " AND ".join(terms) elif filter_type == 'in': in_options = [] for possible_value in prepared_value: is_datetime = False if hasattr(possible_value, 'strftime'): is_datetime = True pv = self.backend._from_python(possible_value) if is_datetime is True: pv = self._convert_datetime(pv) if isinstance(pv, six.string_types) and not is_datetime: in_options.append('"%s"' % pv) else: in_options.append('%s' % pv) query_frag = "(%s)" % " OR ".join(in_options) elif filter_type == 'range': start = self.backend._from_python(prepared_value[0]) end = self.backend._from_python(prepared_value[1]) if hasattr(prepared_value[0], 'strftime'): start = self._convert_datetime(start) if hasattr(prepared_value[1], 'strftime'): end = self._convert_datetime(end) query_frag = u"[%s to %s]" % (start, end) elif filter_type == 'exact': if value.input_type_name == 'exact': query_frag = prepared_value else: prepared_value = Exact(prepared_value).prepare(self) query_frag = filter_types[filter_type] % prepared_value else: if is_datetime is True: prepared_value = self._convert_datetime(prepared_value) query_frag = filter_types[filter_type] % prepared_value if len(query_frag) and not isinstance(value, Raw): if not query_frag.startswith('(') and not query_frag.endswith(')'): query_frag = "(%s)" % query_frag return u"%s%s" % (index_fieldname, query_frag) class WhooshEngine(BaseEngine): backend = WhooshSearchBackend query = WhooshSearchQuery django-haystack-2.8.0/haystack/constants.py000066400000000000000000000025731325051407000207710ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.conf import settings DEFAULT_ALIAS = 'default' # Reserved field names ID = getattr(settings, 'HAYSTACK_ID_FIELD', 'id') DJANGO_CT = getattr(settings, 'HAYSTACK_DJANGO_CT_FIELD', 'django_ct') DJANGO_ID = getattr(settings, 'HAYSTACK_DJANGO_ID_FIELD', 'django_id') DOCUMENT_FIELD = getattr(settings, 'HAYSTACK_DOCUMENT_FIELD', 'text') # Default operator. Valid options are AND/OR. DEFAULT_OPERATOR = getattr(settings, 'HAYSTACK_DEFAULT_OPERATOR', 'AND') # Default values on elasticsearch FUZZY_MIN_SIM = getattr(settings, 'HAYSTACK_FUZZY_MIN_SIM', 0.5) FUZZY_MAX_EXPANSIONS = getattr(settings, 'HAYSTACK_FUZZY_MAX_EXPANSIONS', 50) # Valid expression extensions. VALID_FILTERS = set(['contains', 'exact', 'gt', 'gte', 'lt', 'lte', 'in', 'startswith', 'range', 'endswith', 'content', 'fuzzy']) FILTER_SEPARATOR = '__' # The maximum number of items to display in a SearchQuerySet.__repr__ REPR_OUTPUT_SIZE = 20 # Number of SearchResults to load at a time. ITERATOR_LOAD_PER_QUERY = getattr(settings, 'HAYSTACK_ITERATOR_LOAD_PER_QUERY', 10) # A marker class in the hierarchy to indicate that it handles search data. class Indexable(object): haystack_use_for_indexing = True # For the geo bits, since that's what Solr & Elasticsearch seem to silently # assume... WGS_84_SRID = 4326 django-haystack-2.8.0/haystack/exceptions.py000066400000000000000000000023211325051407000211250ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals class HaystackError(Exception): """A generic exception for all others to extend.""" pass class SearchBackendError(HaystackError): """Raised when a backend can not be found.""" pass class SearchFieldError(HaystackError): """Raised when a field encounters an error.""" pass class MissingDependency(HaystackError): """Raised when a library a backend depends on can not be found.""" pass class NotHandled(HaystackError): """Raised when a model is not handled by the router setup.""" pass class MoreLikeThisError(HaystackError): """Raised when a model instance has not been provided for More Like This.""" pass class FacetingError(HaystackError): """Raised when incorrect arguments have been provided for faceting.""" pass class SpatialError(HaystackError): """Raised when incorrect arguments have been provided for spatial.""" pass class StatsError(HaystackError): "Raised when incorrect arguments have been provided for stats" pass class SkipDocument(HaystackError): """Raised when a document should be skipped while updating""" pass django-haystack-2.8.0/haystack/fields.py000066400000000000000000000356131325051407000202240ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import re from django.template import loader from django.utils import datetime_safe, six from haystack.exceptions import SearchFieldError from haystack.utils import get_model_ct_tuple from inspect import ismethod class NOT_PROVIDED: pass # Note that dates in the full ISO 8601 format will be accepted as long as the hour/minute/second components # are zeroed for compatibility with search backends which lack a date time distinct from datetime: DATE_REGEX = re.compile(r'^(?P\d{4})-(?P\d{2})-(?P\d{2})(?:|T00:00:00Z?)$') DATETIME_REGEX = re.compile(r'^(?P\d{4})-(?P\d{2})-(?P\d{2})(T|\s+)(?P\d{2}):(?P\d{2}):(?P\d{2}).*?$') # All the SearchFields variants. class SearchField(object): """The base implementation of a search field.""" field_type = None def __init__(self, model_attr=None, use_template=False, template_name=None, document=False, indexed=True, stored=True, faceted=False, default=NOT_PROVIDED, null=False, index_fieldname=None, facet_class=None, boost=1.0, weight=None): # Track what the index thinks this field is called. self.instance_name = None self.model_attr = model_attr self.use_template = use_template self.template_name = template_name self.document = document self.indexed = indexed self.stored = stored self.faceted = faceted self._default = default self.null = null self.index_fieldname = index_fieldname self.boost = weight or boost self.is_multivalued = False # We supply the facet_class for making it easy to create a faceted # field based off of this field. self.facet_class = facet_class if self.facet_class is None: self.facet_class = FacetCharField self.set_instance_name(None) def set_instance_name(self, instance_name): self.instance_name = instance_name if self.index_fieldname is None: self.index_fieldname = self.instance_name def has_default(self): """Returns a boolean of whether this field has a default value.""" return self._default is not NOT_PROVIDED @property def default(self): """Returns the default value for the field.""" if callable(self._default): return self._default() return self._default def prepare(self, obj): """ Takes data from the provided object and prepares it for storage in the index. """ # Give priority to a template. if self.use_template: return self.prepare_template(obj) elif self.model_attr is not None: attrs = self.split_model_attr_lookups() current_objects = [obj] values = self.resolve_attributes_lookup(current_objects, attrs) if len(values) == 1: return values[0] elif len(values) > 1: return values if self.has_default(): return self.default else: return None def resolve_attributes_lookup(self, current_objects, attributes): """ Recursive method that looks, for one or more objects, for an attribute that can be multiple objects (relations) deep. """ values = [] for current_object in current_objects: if not hasattr(current_object, attributes[0]): raise SearchFieldError( "The model '%s' does not have a model_attr '%s'." % (repr(current_object), attributes[0]) ) if len(attributes) > 1: current_objects_in_attr = self.get_iterable_objects(getattr(current_object, attributes[0])) return self.resolve_attributes_lookup(current_objects_in_attr, attributes[1:]) current_object = getattr(current_object, attributes[0]) if current_object is None: if self.has_default(): current_object = self._default elif self.null: current_object = None else: raise SearchFieldError( "The model '%s' combined with model_attr '%s' returned None, but doesn't allow " "a default or null value." % (repr(current_object), self.model_attr) ) if callable(current_object): values.append(current_object()) else: values.append(current_object) return values def split_model_attr_lookups(self): """Returns list of nested attributes for looking through the relation.""" return self.model_attr.split('__') @classmethod def get_iterable_objects(cls, current_objects): """ Returns iterable of objects that contain data. For example, resolves Django ManyToMany relationship so the attributes of the related models can then be accessed. """ if current_objects is None: return [] if hasattr(current_objects, 'all'): # i.e, Django ManyToMany relationships if ismethod(current_objects.all): return current_objects.all() return [] elif not hasattr(current_objects, '__iter__'): current_objects = [current_objects] return current_objects def prepare_template(self, obj): """ Flattens an object for indexing. This loads a template (``search/indexes/{app_label}/{model_name}_{field_name}.txt``) and returns the result of rendering that template. ``object`` will be in its context. """ if self.instance_name is None and self.template_name is None: raise SearchFieldError("This field requires either its instance_name variable to be populated or an explicit template_name in order to load the correct template.") if self.template_name is not None: template_names = self.template_name if not isinstance(template_names, (list, tuple)): template_names = [template_names] else: app_label, model_name = get_model_ct_tuple(obj) template_names = ['search/indexes/%s/%s_%s.txt' % (app_label, model_name, self.instance_name)] t = loader.select_template(template_names) return t.render({'object': obj}) def convert(self, value): """ Handles conversion between the data found and the type of the field. Extending classes should override this method and provide correct data coercion. """ return value class CharField(SearchField): field_type = 'string' def __init__(self, **kwargs): if kwargs.get('facet_class') is None: kwargs['facet_class'] = FacetCharField super(CharField, self).__init__(**kwargs) def prepare(self, obj): return self.convert(super(CharField, self).prepare(obj)) def convert(self, value): if value is None: return None return six.text_type(value) class LocationField(SearchField): field_type = 'location' def prepare(self, obj): from haystack.utils.geo import ensure_point value = super(LocationField, self).prepare(obj) if value is None: return None pnt = ensure_point(value) pnt_lng, pnt_lat = pnt.coords return "%s,%s" % (pnt_lat, pnt_lng) def convert(self, value): from haystack.utils.geo import ensure_point, Point if value is None: return None if hasattr(value, 'geom_type'): value = ensure_point(value) return value if isinstance(value, six.string_types): lat, lng = value.split(',') elif isinstance(value, (list, tuple)): # GeoJSON-alike lat, lng = value[1], value[0] elif isinstance(value, dict): lat = value.get('lat', 0) lng = value.get('lon', 0) else: raise TypeError('Unable to extract coordinates from %r' % value) value = Point(float(lng), float(lat)) return value class NgramField(CharField): field_type = 'ngram' def __init__(self, **kwargs): if kwargs.get('faceted') is True: raise SearchFieldError("%s can not be faceted." % self.__class__.__name__) super(NgramField, self).__init__(**kwargs) class EdgeNgramField(NgramField): field_type = 'edge_ngram' class IntegerField(SearchField): field_type = 'integer' def __init__(self, **kwargs): if kwargs.get('facet_class') is None: kwargs['facet_class'] = FacetIntegerField super(IntegerField, self).__init__(**kwargs) def prepare(self, obj): return self.convert(super(IntegerField, self).prepare(obj)) def convert(self, value): if value is None: return None return int(value) class FloatField(SearchField): field_type = 'float' def __init__(self, **kwargs): if kwargs.get('facet_class') is None: kwargs['facet_class'] = FacetFloatField super(FloatField, self).__init__(**kwargs) def prepare(self, obj): return self.convert(super(FloatField, self).prepare(obj)) def convert(self, value): if value is None: return None return float(value) class DecimalField(SearchField): field_type = 'string' def __init__(self, **kwargs): if kwargs.get('facet_class') is None: kwargs['facet_class'] = FacetDecimalField super(DecimalField, self).__init__(**kwargs) def prepare(self, obj): return self.convert(super(DecimalField, self).prepare(obj)) def convert(self, value): if value is None: return None return six.text_type(value) class BooleanField(SearchField): field_type = 'boolean' def __init__(self, **kwargs): if kwargs.get('facet_class') is None: kwargs['facet_class'] = FacetBooleanField super(BooleanField, self).__init__(**kwargs) def prepare(self, obj): return self.convert(super(BooleanField, self).prepare(obj)) def convert(self, value): if value is None: return None return bool(value) class DateField(SearchField): field_type = 'date' def __init__(self, **kwargs): if kwargs.get('facet_class') is None: kwargs['facet_class'] = FacetDateField super(DateField, self).__init__(**kwargs) def prepare(self, obj): return self.convert(super(DateField, self).prepare(obj)) def convert(self, value): if value is None: return None if isinstance(value, six.string_types): match = DATE_REGEX.search(value) if match: data = match.groupdict() return datetime_safe.date(int(data['year']), int(data['month']), int(data['day'])) else: raise SearchFieldError("Date provided to '%s' field doesn't appear to be a valid date string: '%s'" % (self.instance_name, value)) return value class DateTimeField(SearchField): field_type = 'datetime' def __init__(self, **kwargs): if kwargs.get('facet_class') is None: kwargs['facet_class'] = FacetDateTimeField super(DateTimeField, self).__init__(**kwargs) def prepare(self, obj): return self.convert(super(DateTimeField, self).prepare(obj)) def convert(self, value): if value is None: return None if isinstance(value, six.string_types): match = DATETIME_REGEX.search(value) if match: data = match.groupdict() return datetime_safe.datetime(int(data['year']), int(data['month']), int(data['day']), int(data['hour']), int(data['minute']), int(data['second'])) else: raise SearchFieldError("Datetime provided to '%s' field doesn't appear to be a valid datetime string: '%s'" % (self.instance_name, value)) return value class MultiValueField(SearchField): field_type = 'string' def __init__(self, **kwargs): if kwargs.get('facet_class') is None: kwargs['facet_class'] = FacetMultiValueField if kwargs.get('use_template') is True: raise SearchFieldError("'%s' fields can not use templates to prepare their data." % self.__class__.__name__) super(MultiValueField, self).__init__(**kwargs) self.is_multivalued = True def prepare(self, obj): return self.convert(super(MultiValueField, self).prepare(obj)) def convert(self, value): if value is None: return None if hasattr(value, '__iter__') and not isinstance(value, six.text_type): return value return [value] class FacetField(SearchField): """ ``FacetField`` is slightly different than the other fields because it can work in conjunction with other fields as its data source. Accepts an optional ``facet_for`` kwarg, which should be the field name (not ``index_fieldname``) of the field it should pull data from. """ instance_name = None def __init__(self, **kwargs): handled_kwargs = self.handle_facet_parameters(kwargs) super(FacetField, self).__init__(**handled_kwargs) def handle_facet_parameters(self, kwargs): if kwargs.get('faceted', False): raise SearchFieldError("FacetField (%s) does not accept the 'faceted' argument." % self.instance_name) if not kwargs.get('null', True): raise SearchFieldError("FacetField (%s) does not accept False for the 'null' argument." % self.instance_name) if not kwargs.get('indexed', True): raise SearchFieldError("FacetField (%s) does not accept False for the 'indexed' argument." % self.instance_name) if kwargs.get('facet_class'): raise SearchFieldError("FacetField (%s) does not accept the 'facet_class' argument." % self.instance_name) self.facet_for = None self.facet_class = None # Make sure the field is nullable. kwargs['null'] = True if 'facet_for' in kwargs: self.facet_for = kwargs['facet_for'] del(kwargs['facet_for']) return kwargs def get_facet_for_name(self): return self.facet_for or self.instance_name class FacetCharField(FacetField, CharField): pass class FacetIntegerField(FacetField, IntegerField): pass class FacetFloatField(FacetField, FloatField): pass class FacetDecimalField(FacetField, DecimalField): pass class FacetBooleanField(FacetField, BooleanField): pass class FacetDateField(FacetField, DateField): pass class FacetDateTimeField(FacetField, DateTimeField): pass class FacetMultiValueField(FacetField, MultiValueField): pass django-haystack-2.8.0/haystack/forms.py000066400000000000000000000101731325051407000200760ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django import forms from django.utils.encoding import smart_text from django.utils.text import capfirst from django.utils.translation import ugettext_lazy as _ from haystack import connections from haystack.constants import DEFAULT_ALIAS from haystack.query import EmptySearchQuerySet, SearchQuerySet from haystack.utils import get_model_ct from haystack.utils.app_loading import haystack_get_model def model_choices(using=DEFAULT_ALIAS): choices = [(get_model_ct(m), capfirst(smart_text(m._meta.verbose_name_plural))) for m in connections[using].get_unified_index().get_indexed_models()] return sorted(choices, key=lambda x: x[1]) class SearchForm(forms.Form): q = forms.CharField(required=False, label=_('Search'), widget=forms.TextInput(attrs={'type': 'search'})) def __init__(self, *args, **kwargs): self.searchqueryset = kwargs.pop('searchqueryset', None) self.load_all = kwargs.pop('load_all', False) if self.searchqueryset is None: self.searchqueryset = SearchQuerySet() super(SearchForm, self).__init__(*args, **kwargs) def no_query_found(self): """ Determines the behavior when no query was found. By default, no results are returned (``EmptySearchQuerySet``). Should you want to show all results, override this method in your own ``SearchForm`` subclass and do ``return self.searchqueryset.all()``. """ return EmptySearchQuerySet() def search(self): if not self.is_valid(): return self.no_query_found() if not self.cleaned_data.get('q'): return self.no_query_found() sqs = self.searchqueryset.auto_query(self.cleaned_data['q']) if self.load_all: sqs = sqs.load_all() return sqs def get_suggestion(self): if not self.is_valid(): return None return self.searchqueryset.spelling_suggestion(self.cleaned_data['q']) class HighlightedSearchForm(SearchForm): def search(self): return super(HighlightedSearchForm, self).search().highlight() class FacetedSearchForm(SearchForm): def __init__(self, *args, **kwargs): self.selected_facets = kwargs.pop("selected_facets", []) super(FacetedSearchForm, self).__init__(*args, **kwargs) def search(self): sqs = super(FacetedSearchForm, self).search() # We need to process each facet to ensure that the field name and the # value are quoted correctly and separately: for facet in self.selected_facets: if ":" not in facet: continue field, value = facet.split(":", 1) if value: sqs = sqs.narrow(u'%s:"%s"' % (field, sqs.query.clean(value))) return sqs class ModelSearchForm(SearchForm): def __init__(self, *args, **kwargs): super(ModelSearchForm, self).__init__(*args, **kwargs) self.fields['models'] = forms.MultipleChoiceField(choices=model_choices(), required=False, label=_('Search In'), widget=forms.CheckboxSelectMultiple) def get_models(self): """Return a list of the selected models.""" search_models = [] if self.is_valid(): for model in self.cleaned_data['models']: search_models.append(haystack_get_model(*model.split('.'))) return search_models def search(self): sqs = super(ModelSearchForm, self).search() return sqs.models(*self.get_models()) class HighlightedModelSearchForm(ModelSearchForm): def search(self): return super(HighlightedModelSearchForm, self).search().highlight() class FacetedModelSearchForm(ModelSearchForm): selected_facets = forms.CharField(required=False, widget=forms.HiddenInput) def search(self): sqs = super(FacetedModelSearchForm, self).search() if hasattr(self, 'cleaned_data') and self.cleaned_data['selected_facets']: sqs = sqs.narrow(self.cleaned_data['selected_facets']) return sqs.models(*self.get_models()) django-haystack-2.8.0/haystack/generic_views.py000066400000000000000000000076161325051407000216110ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.conf import settings from django.core.paginator import Paginator from django.views.generic import FormView from django.views.generic.edit import FormMixin from django.views.generic.list import MultipleObjectMixin from .forms import FacetedSearchForm, ModelSearchForm from .query import SearchQuerySet RESULTS_PER_PAGE = getattr(settings, 'HAYSTACK_SEARCH_RESULTS_PER_PAGE', 20) class SearchMixin(MultipleObjectMixin, FormMixin): """ A mixin that allows adding in Haystacks search functionality into another view class. This mixin exhibits similar end functionality as the base Haystack search view, but with some important distinctions oriented around greater compatibility with Django's built-in class based views and mixins. Normal flow: self.request = request self.form = self.build_form() self.query = self.get_query() self.results = self.get_results() return self.create_response() This mixin should: 1. Make the form 2. Get the queryset 3. Return the paginated queryset """ template_name = 'search/search.html' load_all = True form_class = ModelSearchForm queryset = SearchQuerySet() context_object_name = None paginate_by = RESULTS_PER_PAGE paginate_orphans = 0 paginator_class = Paginator page_kwarg = 'page' form_name = 'form' search_field = 'q' object_list = None def get_form_kwargs(self): """ Returns the keyword arguments for instantiating the form. """ kwargs = {'initial': self.get_initial()} if self.request.method == 'GET': kwargs.update({ 'data': self.request.GET, }) kwargs.update({ 'searchqueryset': self.get_queryset(), 'load_all': self.load_all, }) return kwargs def form_invalid(self, form): context = self.get_context_data(**{ self.form_name: form, 'object_list': self.get_queryset() }) return self.render_to_response(context) def form_valid(self, form): self.queryset = form.search() context = self.get_context_data(**{ self.form_name: form, 'query': form.cleaned_data.get(self.search_field), 'object_list': self.queryset }) return self.render_to_response(context) class FacetedSearchMixin(SearchMixin): """ A mixin that allows adding in a Haystack search functionality with search faceting. """ form_class = FacetedSearchForm facet_fields = None def get_form_kwargs(self): kwargs = super(FacetedSearchMixin, self).get_form_kwargs() kwargs.update({ 'selected_facets': self.request.GET.getlist("selected_facets") }) return kwargs def get_context_data(self, **kwargs): context = super(FacetedSearchMixin, self).get_context_data(**kwargs) context.update({'facets': self.queryset.facet_counts()}) return context def get_queryset(self): qs = super(FacetedSearchMixin, self).get_queryset() for field in self.facet_fields: qs = qs.facet(field) return qs class SearchView(SearchMixin, FormView): """A view class for searching a Haystack managed search index""" def get(self, request, *args, **kwargs): """ Handles GET requests and instantiates a blank version of the form. """ form_class = self.get_form_class() form = self.get_form(form_class) if form.is_valid(): return self.form_valid(form) else: return self.form_invalid(form) class FacetedSearchView(FacetedSearchMixin, SearchView): """ A view class for searching a Haystack managed search index with facets """ pass django-haystack-2.8.0/haystack/indexes.py000066400000000000000000000427371325051407000204220ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import copy import threading import warnings from django.core.exceptions import ImproperlyConfigured from django.utils.encoding import force_text from django.utils.six import with_metaclass from haystack import connection_router, connections from haystack.constants import DEFAULT_ALIAS, DJANGO_CT, DJANGO_ID, ID, Indexable from haystack.fields import * from haystack.manager import SearchIndexManager from haystack.utils import get_facet_field_name, get_identifier, get_model_ct class DeclarativeMetaclass(type): def __new__(cls, name, bases, attrs): attrs['fields'] = {} # Inherit any fields from parent(s). try: parents = [b for b in bases if issubclass(b, SearchIndex)] # Simulate the MRO. parents.reverse() for p in parents: fields = getattr(p, 'fields', None) if fields: attrs['fields'].update(fields) except NameError: pass # Build a dictionary of faceted fields for cross-referencing. facet_fields = {} for field_name, obj in attrs.items(): # Only need to check the FacetFields. if hasattr(obj, 'facet_for'): if not obj.facet_for in facet_fields: facet_fields[obj.facet_for] = [] facet_fields[obj.facet_for].append(field_name) built_fields = {} for field_name, obj in attrs.items(): if isinstance(obj, SearchField): field = attrs[field_name] field.set_instance_name(field_name) built_fields[field_name] = field # Only check non-faceted fields for the following info. if not hasattr(field, 'facet_for'): if field.faceted == True: # If no other field is claiming this field as # ``facet_for``, create a shadow ``FacetField``. if not field_name in facet_fields: shadow_facet_name = get_facet_field_name(field_name) shadow_facet_field = field.facet_class(facet_for=field_name) shadow_facet_field.set_instance_name(shadow_facet_name) built_fields[shadow_facet_name] = shadow_facet_field attrs['fields'].update(built_fields) # Assigning default 'objects' query manager if it does not already exist if not 'objects' in attrs: try: attrs['objects'] = SearchIndexManager(attrs['Meta'].index_label) except (KeyError, AttributeError): attrs['objects'] = SearchIndexManager(DEFAULT_ALIAS) return super(DeclarativeMetaclass, cls).__new__(cls, name, bases, attrs) class SearchIndex(with_metaclass(DeclarativeMetaclass, threading.local)): """ Base class for building indexes. An example might look like this:: import datetime from haystack import indexes from myapp.models import Note class NoteIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) author = indexes.CharField(model_attr='user') pub_date = indexes.DateTimeField(model_attr='pub_date') def get_model(self): return Note def index_queryset(self, using=None): return self.get_model().objects.filter(pub_date__lte=datetime.datetime.now()) """ def __init__(self): self.prepared_data = None content_fields = [] self.field_map = dict() for field_name, field in self.fields.items(): #form field map self.field_map[field.index_fieldname] = field_name if field.document is True: content_fields.append(field_name) if not len(content_fields) == 1: raise SearchFieldError("The index '%s' must have one (and only one) SearchField with document=True." % self.__class__.__name__) def get_model(self): """ Should return the ``Model`` class (not an instance) that the rest of the ``SearchIndex`` should use. This method is required & you must override it to return the correct class. """ raise NotImplementedError("You must provide a 'get_model' method for the '%r' index." % self) def index_queryset(self, using=None): """ Get the default QuerySet to index when doing a full update. Subclasses can override this method to avoid indexing certain objects. """ return self.get_model()._default_manager.all() def read_queryset(self, using=None): """ Get the default QuerySet for read actions. Subclasses can override this method to work with other managers. Useful when working with default managers that filter some objects. """ return self.index_queryset(using=using) def build_queryset(self, using=None, start_date=None, end_date=None): """ Get the default QuerySet to index when doing an index update. Subclasses can override this method to take into account related model modification times. The default is to use ``SearchIndex.index_queryset`` and filter based on ``SearchIndex.get_updated_field`` """ extra_lookup_kwargs = {} model = self.get_model() updated_field = self.get_updated_field() update_field_msg = ("No updated date field found for '%s' " "- not restricting by age.") % model.__name__ if start_date: if updated_field: extra_lookup_kwargs['%s__gte' % updated_field] = start_date else: warnings.warn(update_field_msg) if end_date: if updated_field: extra_lookup_kwargs['%s__lte' % updated_field] = end_date else: warnings.warn(update_field_msg) index_qs = None if hasattr(self, 'get_queryset'): warnings.warn("'SearchIndex.get_queryset' was deprecated in Haystack v2. Please rename the method 'index_queryset'.") index_qs = self.get_queryset() else: index_qs = self.index_queryset(using=using) if not hasattr(index_qs, 'filter'): raise ImproperlyConfigured("The '%r' class must return a 'QuerySet' in the 'index_queryset' method." % self) # `.select_related()` seems like a good idea here but can fail on # nullable `ForeignKey` as well as what seems like other cases. return index_qs.filter(**extra_lookup_kwargs).order_by(model._meta.pk.name) def prepare(self, obj): """ Fetches and adds/alters data before indexing. """ self.prepared_data = { ID: get_identifier(obj), DJANGO_CT: get_model_ct(obj), DJANGO_ID: force_text(obj.pk), } for field_name, field in self.fields.items(): # Use the possibly overridden name, which will default to the # variable name of the field. self.prepared_data[field.index_fieldname] = field.prepare(obj) if hasattr(self, "prepare_%s" % field_name): value = getattr(self, "prepare_%s" % field_name)(obj) self.prepared_data[field.index_fieldname] = value return self.prepared_data def full_prepare(self, obj): self.prepared_data = self.prepare(obj) for field_name, field in self.fields.items(): # Duplicate data for faceted fields. if getattr(field, 'facet_for', None): source_field_name = self.fields[field.facet_for].index_fieldname # If there's data there, leave it alone. Otherwise, populate it # with whatever the related field has. if self.prepared_data[field_name] is None and source_field_name in self.prepared_data: self.prepared_data[field.index_fieldname] = self.prepared_data[source_field_name] # Remove any fields that lack a value and are ``null=True``. if field.null is True: if self.prepared_data[field.index_fieldname] is None: del(self.prepared_data[field.index_fieldname]) return self.prepared_data def get_content_field(self): """Returns the field that supplies the primary document to be indexed.""" for field_name, field in self.fields.items(): if field.document is True: return field.index_fieldname def get_field_weights(self): """Returns a dict of fields with weight values""" weights = {} for field_name, field in self.fields.items(): if field.boost: weights[field_name] = field.boost return weights def _get_backend(self, using): warnings.warn('SearchIndex._get_backend is deprecated; use SearchIndex.get_backend instead', DeprecationWarning) return self.get_backend(using) def get_backend(self, using=None): if using is None: try: using = connection_router.for_write(index=self)[0] except IndexError: # There's no backend to handle it. Bomb out. return None return connections[using].get_backend() def update(self, using=None): """ Updates the entire index. If ``using`` is provided, it specifies which connection should be used. Default relies on the routers to decide which backend should be used. """ backend = self.get_backend(using) if backend is not None: backend.update(self, self.index_queryset(using=using)) def update_object(self, instance, using=None, **kwargs): """ Update the index for a single object. Attached to the class's post-save hook. If ``using`` is provided, it specifies which connection should be used. Default relies on the routers to decide which backend should be used. """ # Check to make sure we want to index this first. if self.should_update(instance, **kwargs): backend = self.get_backend(using) if backend is not None: backend.update(self, [instance]) def remove_object(self, instance, using=None, **kwargs): """ Remove an object from the index. Attached to the class's post-delete hook. If ``using`` is provided, it specifies which connection should be used. Default relies on the routers to decide which backend should be used. """ backend = self.get_backend(using) if backend is not None: backend.remove(instance, **kwargs) def clear(self, using=None): """ Clears the entire index. If ``using`` is provided, it specifies which connection should be used. Default relies on the routers to decide which backend should be used. """ backend = self.get_backend(using) if backend is not None: backend.clear(models=[self.get_model()]) def reindex(self, using=None): """ Completely clear the index for this model and rebuild it. If ``using`` is provided, it specifies which connection should be used. Default relies on the routers to decide which backend should be used. """ self.clear(using=using) self.update(using=using) def get_updated_field(self): """ Get the field name that represents the updated date for the model. If specified, this is used by the reindex command to filter out results from the QuerySet, enabling you to reindex only recent records. This method should either return None (reindex everything always) or a string of the Model's DateField/DateTimeField name. """ return None def should_update(self, instance, **kwargs): """ Determine if an object should be updated in the index. It's useful to override this when an object may save frequently and cause excessive reindexing. You should check conditions on the instance and return False if it is not to be indexed. By default, returns True (always reindex). """ return True def load_all_queryset(self): """ Provides the ability to override how objects get loaded in conjunction with ``SearchQuerySet.load_all``. This is useful for post-processing the results from the query, enabling things like adding ``select_related`` or filtering certain data. By default, returns ``all()`` on the model's default manager. """ return self.get_model()._default_manager.all() class BasicSearchIndex(SearchIndex): text = CharField(document=True, use_template=True) # End SearchIndexes # Begin ModelSearchIndexes def index_field_from_django_field(f, default=CharField): """ Returns the Haystack field type that would likely be associated with each Django type. """ result = default if f.get_internal_type() in ('DateField', 'DateTimeField'): result = DateTimeField elif f.get_internal_type() in ('BooleanField', 'NullBooleanField'): result = BooleanField elif f.get_internal_type() in ('CommaSeparatedIntegerField',): result = MultiValueField elif f.get_internal_type() in ('DecimalField', 'FloatField'): result = FloatField elif f.get_internal_type() in ('IntegerField', 'PositiveIntegerField', 'PositiveSmallIntegerField', 'SmallIntegerField'): result = IntegerField return result class ModelSearchIndex(SearchIndex): """ Introspects the model assigned to it and generates a `SearchIndex` based on the fields of that model. In addition, it adds a `text` field that is the `document=True` field and has `use_template=True` option set, just like the `BasicSearchIndex`. Usage of this class might result in inferior `SearchIndex` objects, which can directly affect your search results. Use this to establish basic functionality and move to custom `SearchIndex` objects for better control. At this time, it does not handle related fields. """ text = CharField(document=True, use_template=True) # list of reserved field names fields_to_skip = (ID, DJANGO_CT, DJANGO_ID, 'content', 'text') def __init__(self, extra_field_kwargs=None): super(ModelSearchIndex, self).__init__() self.model = None self.prepared_data = None content_fields = [] self.extra_field_kwargs = extra_field_kwargs or {} # Introspect the model, adding/removing fields as needed. # Adds/Excludes should happen only if the fields are not already # defined in `self.fields`. self._meta = getattr(self, 'Meta', None) if self._meta: self.model = getattr(self._meta, 'model', None) fields = getattr(self._meta, 'fields', []) excludes = getattr(self._meta, 'excludes', []) # Add in the new fields. self.fields.update(self.get_fields(fields, excludes)) for field_name, field in self.fields.items(): if field.document is True: content_fields.append(field_name) if not len(content_fields) == 1: raise SearchFieldError("The index '%s' must have one (and only one) SearchField with document=True." % self.__class__.__name__) def should_skip_field(self, field): """ Given a Django model field, return if it should be included in the contributed SearchFields. """ # Skip fields in skip list if field.name in self.fields_to_skip: return True # Ignore certain fields (AutoField, related fields). if field.primary_key or field.is_relation: return True return False def get_model(self): return self.model def get_index_fieldname(self, f): """ Given a Django field, return the appropriate index fieldname. """ return f.name def get_fields(self, fields=None, excludes=None): """ Given any explicit fields to include and fields to exclude, add additional fields based on the associated model. """ final_fields = {} fields = fields or [] excludes = excludes or [] for f in self.model._meta.fields: # If the field name is already present, skip if f.name in self.fields: continue # If field is not present in explicit field listing, skip if fields and f.name not in fields: continue # If field is in exclude list, skip if excludes and f.name in excludes: continue if self.should_skip_field(f): continue index_field_class = index_field_from_django_field(f) kwargs = copy.copy(self.extra_field_kwargs) kwargs.update({ 'model_attr': f.name, }) if f.null is True: kwargs['null'] = True if f.has_default(): kwargs['default'] = f.default final_fields[f.name] = index_field_class(**kwargs) final_fields[f.name].set_instance_name(self.get_index_fieldname(f)) return final_fields django-haystack-2.8.0/haystack/inputs.py000066400000000000000000000106341325051407000202740ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import re import warnings from django.utils.encoding import force_text, python_2_unicode_compatible @python_2_unicode_compatible class BaseInput(object): """ The base input type. Doesn't do much. You want ``Raw`` instead. """ input_type_name = 'base' post_process = True def __init__(self, query_string, **kwargs): self.query_string = query_string self.kwargs = kwargs def __repr__(self): return u"<%s '%s'>" % (self.__class__.__name__, self) def __str__(self): return force_text(self.query_string) def prepare(self, query_obj): return self.query_string class Raw(BaseInput): """ An input type for passing a query directly to the backend. Prone to not being very portable. """ input_type_name = 'raw' post_process = False class PythonData(BaseInput): """ Represents a bare Python non-string type. Largely only for internal use. """ input_type_name = 'python_data' class Clean(BaseInput): """ An input type for sanitizing user/untrusted input. """ input_type_name = 'clean' def prepare(self, query_obj): query_string = super(Clean, self).prepare(query_obj) return query_obj.clean(query_string) class Exact(BaseInput): """ An input type for making exact matches. """ input_type_name = 'exact' def prepare(self, query_obj): query_string = super(Exact, self).prepare(query_obj) if self.kwargs.get('clean', False): # We need to clean each part of the exact match. exact_bits = [Clean(bit).prepare(query_obj) for bit in query_string.split(' ') if bit] query_string = u' '.join(exact_bits) return query_obj.build_exact_query(query_string) class Not(Clean): """ An input type for negating a query. """ input_type_name = 'not' def prepare(self, query_obj): query_string = super(Not, self).prepare(query_obj) return query_obj.build_not_query(query_string) class AutoQuery(BaseInput): """ A convenience class that handles common user queries. In addition to cleaning all tokens, it handles double quote bits as exact matches & terms with '-' in front as NOT queries. """ input_type_name = 'auto_query' post_process = False exact_match_re = re.compile(r'"(?P.*?)"') def prepare(self, query_obj): query_string = super(AutoQuery, self).prepare(query_obj) exacts = self.exact_match_re.findall(query_string) tokens = [] query_bits = [] for rough_token in self.exact_match_re.split(query_string): if not rough_token: continue elif not rough_token in exacts: # We have something that's not an exact match but may have more # than on word in it. tokens.extend(rough_token.split(' ')) else: tokens.append(rough_token) for token in tokens: if not token: continue if token in exacts: query_bits.append(Exact(token, clean=True).prepare(query_obj)) elif token.startswith('-') and len(token) > 1: # This might break Xapian. Check on this. query_bits.append(Not(token[1:]).prepare(query_obj)) else: query_bits.append(Clean(token).prepare(query_obj)) return u' '.join(query_bits) class AltParser(BaseInput): """ If the engine supports it, this input type allows for submitting a query that uses a different parser. """ input_type_name = 'alt_parser' post_process = False use_parens = False def __init__(self, parser_name, query_string='', **kwargs): self.parser_name = parser_name self.query_string = query_string self.kwargs = kwargs def __repr__(self): return u"<%s '%s' '%s' '%s'>" % (self.__class__.__name__, self.parser_name, self.query_string, self.kwargs) def prepare(self, query_obj): if not hasattr(query_obj, 'build_alt_parser_query'): warnings.warn("Use of 'AltParser' input type is being ignored, as the '%s' backend doesn't support them." % query_obj) return '' return query_obj.build_alt_parser_query(self.parser_name, self.query_string, **self.kwargs) django-haystack-2.8.0/haystack/management/000077500000000000000000000000001325051407000205105ustar00rootroot00000000000000django-haystack-2.8.0/haystack/management/__init__.py000066400000000000000000000000001325051407000226070ustar00rootroot00000000000000django-haystack-2.8.0/haystack/management/commands/000077500000000000000000000000001325051407000223115ustar00rootroot00000000000000django-haystack-2.8.0/haystack/management/commands/__init__.py000066400000000000000000000000001325051407000244100ustar00rootroot00000000000000django-haystack-2.8.0/haystack/management/commands/build_solr_schema.py000066400000000000000000000153021325051407000263420ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import os import requests from django.conf import settings from django.core.exceptions import ImproperlyConfigured from django.core.management.base import BaseCommand, CommandError from django.template import loader from haystack import connections, constants from haystack.backends.solr_backend import SolrSearchBackend class Command(BaseCommand): help = "Generates a Solr schema that reflects the indexes using templates " \ " under a django template dir 'search_configuration/*.xml'. If none are " \ " found, then provides defaults suitable to Solr 6.4" schema_template_loc = 'search_configuration/schema.xml' solrcfg_template_loc = 'search_configuration/solrconfig.xml' def add_arguments(self, parser): parser.add_argument( "-f", "--filename", help='Generate schema.xml directly into a file instead of stdout.' ' Does not render solrconfig.xml' ) parser.add_argument( "-u", "--using", default=constants.DEFAULT_ALIAS, help='Select a specific Solr connection to work with.' ) parser.add_argument( "-c", "--configure-directory", help='Attempt to configure a core located in the given directory' ' by removing the managed-schema.xml(renaming) if it ' ' exists, configuring the core by rendering the schema.xml and ' ' solrconfig.xml templates provided in the django project\'s ' ' TEMPLATE_DIR/search_configuration directories' ) parser.add_argument( "-r", "--reload-core", help='If provided, attempts to automatically reload the solr core' ' via the urls in the "URL" and "ADMIN_URL" settings of the SOLR' ' HAYSTACK_CONNECTIONS entry. Both MUST be set.' ) def handle(self, **options): """Generates a Solr schema that reflects the indexes.""" using = options.get('using') if not isinstance(connections[using].get_backend(), SolrSearchBackend): raise ImproperlyConfigured("'%s' isn't configured as a SolrEngine" % using) schema_xml = self.build_template(using=using, template_filename=Command.schema_template_loc) solrcfg_xml = self.build_template(using=using, template_filename=Command.solrcfg_template_loc) filename = options.get('filename') configure_directory = options.get('configure_directory') reload_core = options.get('reload_core') if filename: self.stdout.write("Trying to write schema file located at {}".format(filename)) self.write_file(filename, schema_xml) if reload_core: connections[using].get_backend().reload() if configure_directory: self.stdout.write("Trying to configure core located at {}".format(configure_directory)) managed_schema_path = os.path.join(configure_directory, 'managed-schema') if os.path.isfile(managed_schema_path): try: os.rename(managed_schema_path, '%s.old' % managed_schema_path) except (IOError, OSError) as exc: raise CommandError('Could not rename old managed schema file {}: {}'.format(managed_schema_path, exc)) schema_xml_path = os.path.join(configure_directory, 'schema.xml') try: self.write_file(schema_xml_path, schema_xml) except EnvironmentError as exc: raise CommandError('Could not configure {}: {}'.format(schema_xml_path, exc)) solrconfig_path = os.path.join(configure_directory, 'solrconfig.xml') try: self.write_file(solrconfig_path, solrcfg_xml) except EnvironmentError as exc: raise CommandError('Could not write {}: {}'.format(solrconfig_path, exc)) if reload_core: core = settings.HAYSTACK_CONNECTIONS[using]['URL'].rsplit('/', 1)[-1] if 'ADMIN_URL' not in settings.HAYSTACK_CONNECTIONS[using]: raise ImproperlyConfigured("'ADMIN_URL' must be specified in the HAYSTACK_CONNECTIONS" " for the %s backend" % using) if 'URL' not in settings.HAYSTACK_CONNECTIONS[using]: raise ImproperlyConfigured("'URL' must be specified in the HAYSTACK_CONNECTIONS" " for the %s backend" % using) try: self.stdout.write("Trying to reload core named {}".format(core)) resp = requests.get(settings.HAYSTACK_CONNECTIONS[using]['ADMIN_URL'], params={'action': 'RELOAD', 'core': core}) if not resp.ok: raise CommandError('Failed to reload core – Solr error: {}'.format(resp)) except CommandError: raise except Exception as exc: raise CommandError('Failed to reload core {}: {}'.format(core, exc)) if not filename and not configure_directory and not reload_core: self.print_stdout(schema_xml) def build_context(self, using): backend = connections[using].get_backend() if not isinstance(backend, SolrSearchBackend): raise ImproperlyConfigured("'%s' isn't configured as a SolrEngine" % backend.connection_alias) content_field_name, fields = backend.build_schema( connections[using].get_unified_index().all_searchfields() ) return { 'content_field_name': content_field_name, 'fields': fields, 'default_operator': constants.DEFAULT_OPERATOR, 'ID': constants.ID, 'DJANGO_CT': constants.DJANGO_CT, 'DJANGO_ID': constants.DJANGO_ID, } def build_template(self, using, template_filename=schema_template_loc): t = loader.get_template(template_filename) c = self.build_context(using=using) return t.render(c) def print_stdout(self, schema_xml): self.stderr.write("\n") self.stderr.write("\n") self.stderr.write("\n") self.stderr.write("Save the following output to 'schema.xml' and place it in your Solr configuration directory.\n") self.stderr.write("--------------------------------------------------------------------------------------------\n") self.stderr.write("\n") self.stdout.write(schema_xml) def write_file(self, filename, schema_xml): with open(filename, 'w') as schema_file: schema_file.write(schema_xml) os.fsync(schema_file.fileno()) django-haystack-2.8.0/haystack/management/commands/clear_index.py000066400000000000000000000042061325051407000251420ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.core.management.base import BaseCommand from django.utils import six from haystack import connections class Command(BaseCommand): help = "Clears out the search index completely." def add_arguments(self, parser): parser.add_argument( '--noinput', action='store_false', dest='interactive', default=True, help='If provided, no prompts will be issued to the user and the data will be wiped out.' ) parser.add_argument( "-u", "--using", action="append", default=[], help='Update only the named backend (can be used multiple times). ' 'By default all backends will be updated.' ) parser.add_argument( '--nocommit', action='store_false', dest='commit', default=True, help='Will pass commit=False to the backend.' ) def handle(self, **options): """Clears out the search index completely.""" self.verbosity = int(options.get('verbosity', 1)) self.commit = options.get('commit', True) using = options.get('using') if not using: using = connections.connections_info.keys() if options.get('interactive', True): self.stdout.write("WARNING: This will irreparably remove EVERYTHING from your search index in connection '%s'." % "', '".join(using)) self.stdout.write("Your choices after this are to restore from backups or rebuild via the `rebuild_index` command.") yes_or_no = six.moves.input("Are you sure you wish to continue? [y/N] ") if not yes_or_no.lower().startswith('y'): self.stdout.write("No action taken.") return if self.verbosity >= 1: self.stdout.write("Removing all documents from your index because you said so.") for backend_name in using: backend = connections[backend_name].get_backend() backend.clear(commit=self.commit) if self.verbosity >= 1: self.stdout.write("All documents removed.") django-haystack-2.8.0/haystack/management/commands/haystack_info.py000066400000000000000000000014271325051407000255110ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.core.management.base import BaseCommand from haystack import connections class Command(BaseCommand): help = "Provides feedback about the current Haystack setup." def handle(self, **options): """Provides feedback about the current Haystack setup.""" unified_index = connections['default'].get_unified_index() indexed = unified_index.get_indexed_models() index_count = len(indexed) self.stdout.write("Number of handled %s index(es)." % index_count) for index in indexed: self.stdout.write(" - Model: %s by Index: %s" % ( index.__name__, unified_index.get_indexes()[index]) ) django-haystack-2.8.0/haystack/management/commands/rebuild_index.py000066400000000000000000000032731325051407000255050ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.core.management import call_command from django.core.management.base import BaseCommand class Command(BaseCommand): help = "Completely rebuilds the search index by removing the old data and then updating." def add_arguments(self, parser): parser.add_argument( '--noinput', action='store_false', dest='interactive', default=True, help='If provided, no prompts will be issued to the user and the data will be wiped out.' ) parser.add_argument( '-u', '--using', action='append', default=[], help='Update only the named backend (can be used multiple times). ' 'By default all backends will be updated.' ) parser.add_argument( '-k', '--workers', default=0, type=int, help='Allows for the use multiple workers to parallelize indexing. Requires multiprocessing.' ) parser.add_argument( '--nocommit', action='store_false', dest='commit', default=True, help='Will pass commit=False to the backend.' ) parser.add_argument( '-b', '--batch-size', dest='batchsize', type=int, help='Number of items to index at once.' ) def handle(self, **options): clear_options = options.copy() update_options = options.copy() for key in ('batchsize', 'workers'): del clear_options[key] for key in ('interactive', ): del update_options[key] call_command('clear_index', **clear_options) call_command('update_index', **update_options) django-haystack-2.8.0/haystack/management/commands/update_index.py000077500000000000000000000345051325051407000253460ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import logging import multiprocessing import os import time from datetime import timedelta from django.core.management.base import BaseCommand from django.db import close_old_connections, reset_queries from django.utils.encoding import force_text, smart_bytes from django.utils.timezone import now from haystack import connections as haystack_connections from haystack.exceptions import NotHandled from haystack.query import SearchQuerySet from haystack.utils.app_loading import haystack_get_models, haystack_load_apps DEFAULT_BATCH_SIZE = None DEFAULT_AGE = None DEFAULT_MAX_RETRIES = 5 LOG = multiprocessing.log_to_stderr(level=logging.WARNING) def update_worker(args): if len(args) != 10: LOG.error('update_worker received incorrect arguments: %r', args) raise ValueError('update_worker received incorrect arguments') model, start, end, total, using, start_date, end_date, verbosity, commit, max_retries = args # FIXME: confirm that this is still relevant with modern versions of Django: # We need to reset the connections, otherwise the different processes # will try to share the connection, which causes things to blow up. from django.db import connections for alias, info in connections.databases.items(): # We need to also tread lightly with SQLite, because blindly wiping # out connections (via ``... = {}``) destroys in-memory DBs. if 'sqlite3' not in info['ENGINE']: try: close_old_connections() if isinstance(connections._connections, dict): del connections._connections[alias] else: delattr(connections._connections, alias) except KeyError: pass # Request that the connection clear out any transient sessions, file handles, etc. haystack_connections[using].reset_sessions() unified_index = haystack_connections[using].get_unified_index() index = unified_index.get_index(model) backend = haystack_connections[using].get_backend() qs = index.build_queryset(start_date=start_date, end_date=end_date) do_update(backend, index, qs, start, end, total, verbosity, commit, max_retries) return args def do_update(backend, index, qs, start, end, total, verbosity=1, commit=True, max_retries=DEFAULT_MAX_RETRIES, last_max_pk=None): # Get a clone of the QuerySet so that the cache doesn't bloat up # in memory. Useful when reindexing large amounts of data. small_cache_qs = qs.all() # If we got the max seen PK from last batch, use it to restrict the qs # to values above; this optimises the query for Postgres as not to # devolve into multi-second run time at large offsets. if last_max_pk is not None: current_qs = small_cache_qs.filter(pk__gt=last_max_pk)[:end - start] else: current_qs = small_cache_qs[start:end] # Remember maximum PK seen so far max_pk = None current_qs = list(current_qs) if current_qs: max_pk = current_qs[-1].pk is_parent_process = hasattr(os, 'getppid') and os.getpid() == os.getppid() if verbosity >= 2: if is_parent_process: print(" indexed %s - %d of %d." % (start + 1, end, total)) else: print(" indexed %s - %d of %d (worker PID: %s)." % (start + 1, end, total, os.getpid())) retries = 0 while retries < max_retries: try: # FIXME: Get the right backend. backend.update(index, current_qs, commit=commit) if verbosity >= 2 and retries: print('Completed indexing {} - {}, tried {}/{} times'.format(start + 1, end, retries + 1, max_retries)) break except Exception as exc: # Catch all exceptions which do not normally trigger a system exit, excluding SystemExit and # KeyboardInterrupt. This avoids needing to import the backend-specific exception subclasses # from pysolr, elasticsearch, whoosh, requests, etc. retries += 1 error_context = {'start': start + 1, 'end': end, 'retries': retries, 'max_retries': max_retries, 'pid': os.getpid(), 'exc': exc} error_msg = 'Failed indexing %(start)s - %(end)s (retry %(retries)s/%(max_retries)s): %(exc)s' if not is_parent_process: error_msg += ' (pid %(pid)s): %(exc)s' if retries >= max_retries: LOG.error(error_msg, error_context, exc_info=True) raise elif verbosity >= 2: LOG.warning(error_msg, error_context, exc_info=True) # If going to try again, sleep a bit before time.sleep(2 ** retries) # Clear out the DB connections queries because it bloats up RAM. reset_queries() return max_pk class Command(BaseCommand): help = "Freshens the index for the given app(s)." def add_arguments(self, parser): parser.add_argument( 'app_label', nargs='*', help='App label of an application to update the search index.' ) parser.add_argument( '-a', '--age', type=int, default=DEFAULT_AGE, help='Number of hours back to consider objects new.' ) parser.add_argument( '-s', '--start', dest='start_date', help='The start date for indexing. Can be any dateutil-parsable string;' ' YYYY-MM-DDTHH:MM:SS is recommended to avoid confusion' ) parser.add_argument( '-e', '--end', dest='end_date', help='The end date for indexing. Can be any dateutil-parsable string;' ' YYYY-MM-DDTHH:MM:SS is recommended to avoid confusion' ) parser.add_argument( '-b', '--batch-size', dest='batchsize', type=int, help='Number of items to index at once.' ) parser.add_argument( '-r', '--remove', action='store_true', default=False, help='Remove objects from the index that are no longer present in the database.' ) parser.add_argument( '-u', '--using', action='append', default=[], help='Update only the named backend (can be used multiple times). ' 'By default all backends will be updated.' ) parser.add_argument( '-k', '--workers', type=int, default=0, help='Allows for the use multiple workers to parallelize indexing.' ) parser.add_argument( '--nocommit', action='store_false', dest='commit', default=True, help='Will pass commit=False to the backend.' ) parser.add_argument( '-t', '--max-retries', action='store', dest='max_retries', type=int, default=DEFAULT_MAX_RETRIES, help='Maximum number of attempts to write to the backend when an error occurs.' ) def handle(self, **options): self.verbosity = int(options.get('verbosity', 1)) self.batchsize = options.get('batchsize', DEFAULT_BATCH_SIZE) self.start_date = None self.end_date = None self.remove = options.get('remove', False) self.workers = options.get('workers', 0) self.commit = options.get('commit', True) self.max_retries = options.get('max_retries', DEFAULT_MAX_RETRIES) self.backends = options.get('using') if not self.backends: self.backends = haystack_connections.connections_info.keys() age = options.get('age', DEFAULT_AGE) start_date = options.get('start_date') end_date = options.get('end_date') if self.verbosity > 2: LOG.setLevel(logging.DEBUG) elif self.verbosity > 1: LOG.setLevel(logging.INFO) if age is not None: self.start_date = now() - timedelta(hours=int(age)) if start_date is not None: from dateutil.parser import parse as dateutil_parse try: self.start_date = dateutil_parse(start_date) except ValueError: pass if end_date is not None: from dateutil.parser import parse as dateutil_parse try: self.end_date = dateutil_parse(end_date) except ValueError: pass labels = options.get('app_label') or haystack_load_apps() for label in labels: for using in self.backends: try: self.update_backend(label, using) except: LOG.exception("Error updating %s using %s ", label, using) raise def update_backend(self, label, using): backend = haystack_connections[using].get_backend() unified_index = haystack_connections[using].get_unified_index() for model in haystack_get_models(label): try: index = unified_index.get_index(model) except NotHandled: if self.verbosity >= 2: self.stdout.write("Skipping '%s' - no index." % model) continue if self.workers > 0: # workers resetting connections leads to references to models / connections getting # stale and having their connection disconnected from under them. Resetting before # the loop continues and it accesses the ORM makes it better. close_old_connections() qs = index.build_queryset(using=using, start_date=self.start_date, end_date=self.end_date) total = qs.count() if self.verbosity >= 1: self.stdout.write(u"Indexing %d %s" % ( total, force_text(model._meta.verbose_name_plural)) ) batch_size = self.batchsize or backend.batch_size if self.workers > 0: ghetto_queue = [] max_pk = None for start in range(0, total, batch_size): end = min(start + batch_size, total) if self.workers == 0: max_pk = do_update(backend, index, qs, start, end, total, verbosity=self.verbosity, commit=self.commit, max_retries=self.max_retries, last_max_pk=max_pk) else: ghetto_queue.append((model, start, end, total, using, self.start_date, self.end_date, self.verbosity, self.commit, self.max_retries)) if self.workers > 0: pool = multiprocessing.Pool(self.workers) successful_tasks = pool.map(update_worker, ghetto_queue) if len(ghetto_queue) != len(successful_tasks): self.stderr.write('Queued %d tasks but only %d completed' % (len(ghetto_queue), len(successful_tasks))) for i in ghetto_queue: if i not in successful_tasks: self.stderr.write('Incomplete task: %s' % repr(i)) pool.close() pool.join() if self.remove: if self.start_date or self.end_date or total <= 0: # They're using a reduced set, which may not incorporate # all pks. Rebuild the list with everything. qs = index.index_queryset().values_list('pk', flat=True) database_pks = set(smart_bytes(pk) for pk in qs) else: database_pks = set(smart_bytes(pk) for pk in qs.values_list('pk', flat=True)) # Since records may still be in the search index but not the local database # we'll use that to create batches for processing. # See https://github.com/django-haystack/django-haystack/issues/1186 index_total = SearchQuerySet(using=backend.connection_alias).models(model).count() # Retrieve PKs from the index. Note that this cannot be a numeric range query because although # pks are normally numeric they can be non-numeric UUIDs or other custom values. To reduce # load on the search engine, we only retrieve the pk field, which will be checked against the # full list obtained from the database, and the id field, which will be used to delete the # record should it be found to be stale. index_pks = SearchQuerySet(using=backend.connection_alias).models(model) index_pks = index_pks.values_list('pk', 'id') # We'll collect all of the record IDs which are no longer present in the database and delete # them after walking the entire index. This uses more memory than the incremental approach but # avoids needing the pagination logic below to account for both commit modes: stale_records = set() for start in range(0, index_total, batch_size): upper_bound = start + batch_size # If the database pk is no longer present, queue the index key for removal: for pk, rec_id in index_pks[start:upper_bound]: if smart_bytes(pk) not in database_pks: stale_records.add(rec_id) if stale_records: if self.verbosity >= 1: self.stdout.write(" removing %d stale records." % len(stale_records)) for rec_id in stale_records: # Since the PK was not in the database list, we'll delete the record from the search # index: if self.verbosity >= 2: self.stdout.write(" removing %s." % rec_id) backend.remove(rec_id, commit=self.commit) django-haystack-2.8.0/haystack/manager.py000066400000000000000000000071111325051407000203600ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from haystack.query import EmptySearchQuerySet, SearchQuerySet class SearchIndexManager(object): def __init__(self, using=None): super(SearchIndexManager, self).__init__() self.using = using def get_search_queryset(self): """Returns a new SearchQuerySet object. Subclasses can override this method to easily customize the behavior of the Manager. """ return SearchQuerySet(using=self.using) def get_empty_query_set(self): return EmptySearchQuerySet(using=self.using) def all(self): return self.get_search_queryset() def none(self): return self.get_empty_query_set() def filter(self, *args, **kwargs): return self.get_search_queryset().filter(*args, **kwargs) def exclude(self, *args, **kwargs): return self.get_search_queryset().exclude(*args, **kwargs) def filter_and(self, *args, **kwargs): return self.get_search_queryset().filter_and(*args, **kwargs) def filter_or(self, *args, **kwargs): return self.get_search_queryset().filter_or(*args, **kwargs) def order_by(self, *args): return self.get_search_queryset().order_by(*args) def highlight(self): return self.get_search_queryset().highlight() def boost(self, term, boost): return self.get_search_queryset().boost(term, boost) def facet(self, field): return self.get_search_queryset().facet(field) def within(self, field, point_1, point_2): return self.get_search_queryset().within(field, point_1, point_2) def dwithin(self, field, point, distance): return self.get_search_queryset().dwithin(field, point, distance) def distance(self, field, point): return self.get_search_queryset().distance(field, point) def date_facet(self, field, start_date, end_date, gap_by, gap_amount=1): return self.get_search_queryset().date_facet(field, start_date, end_date, gap_by, gap_amount=1) def query_facet(self, field, query): return self.get_search_queryset().query_facet(field, query) def narrow(self, query): return self.get_search_queryset().narrow(query) def raw_search(self, query_string, **kwargs): return self.get_search_queryset().raw_search(query_string, **kwargs) def load_all(self): return self.get_search_queryset().load_all() def auto_query(self, query_string, fieldname='content'): return self.get_search_queryset().auto_query(query_string, fieldname=fieldname) def autocomplete(self, **kwargs): return self.get_search_queryset().autocomplete(**kwargs) def using(self, connection_name): return self.get_search_queryset().using(connection_name) def count(self): return self.get_search_queryset().count() def best_match(self): return self.get_search_queryset().best_match() def latest(self, date_field): return self.get_search_queryset().latest(date_field) def more_like_this(self, model_instance): return self.get_search_queryset().more_like_this(model_instance) def facet_counts(self): return self.get_search_queryset().facet_counts() def spelling_suggestion(self, preferred_query=None): return self.get_search_queryset().spelling_suggestion(preferred_query=None) def values(self, *fields): return self.get_search_queryset().values(*fields) def values_list(self, *fields, **kwargs): return self.get_search_queryset().values_list(*fields, **kwargs) django-haystack-2.8.0/haystack/models.py000066400000000000000000000205611325051407000202350ustar00rootroot00000000000000# encoding: utf-8 # "Hey, Django! Look at me, I'm an app! For Serious!" from __future__ import absolute_import, division, print_function, unicode_literals from django.core.exceptions import ObjectDoesNotExist from django.utils import six from django.utils.encoding import force_text from django.utils.text import capfirst from haystack.exceptions import NotHandled, SpatialError from haystack.utils import log as logging from haystack.utils.app_loading import haystack_get_model try: from geopy import distance as geopy_distance except ImportError: geopy_distance = None # Not a Django model, but tightly tied to them and there doesn't seem to be a # better spot in the tree. class SearchResult(object): """ A single search result. The actual object is loaded lazily by accessing object; until then this object only stores the model, pk, and score. Note that iterating over SearchResults and getting the object for each result will do O(N) database queries, which may not fit your needs for performance. """ def __init__(self, app_label, model_name, pk, score, **kwargs): self.app_label, self.model_name = app_label, model_name self.pk = pk self.score = score self._object = None self._model = None self._verbose_name = None self._additional_fields = [] self._point_of_origin = kwargs.pop('_point_of_origin', None) self._distance = kwargs.pop('_distance', None) self.stored_fields = None self.log = self._get_log() for key, value in kwargs.items(): if key not in self.__dict__: self.__dict__[key] = value self._additional_fields.append(key) def _get_log(self): return logging.getLogger('haystack') def __repr__(self): return "" % (self.app_label, self.model_name, self.pk) def __unicode__(self): return force_text(self.__repr__()) def __getattr__(self, attr): if attr == '__getnewargs__': raise AttributeError return self.__dict__.get(attr, None) def _get_searchindex(self): from haystack import connections return connections['default'].get_unified_index().get_index(self.model) searchindex = property(_get_searchindex) def _get_object(self): if self._object is None: if self.model is None: self.log.error("Model could not be found for SearchResult '%s'.", self) return None try: try: self._object = self.searchindex.read_queryset().get(pk=self.pk) except NotHandled: self.log.warning("Model '%s.%s' not handled by the routers.", self.app_label, self.model_name) # Revert to old behaviour self._object = self.model._default_manager.get(pk=self.pk) except ObjectDoesNotExist: self.log.error("Object could not be found in database for SearchResult '%s'.", self) self._object = None return self._object def _set_object(self, obj): self._object = obj object = property(_get_object, _set_object) def _get_model(self): if self._model is None: try: self._model = haystack_get_model(self.app_label, self.model_name) except LookupError: # this changed in change 1.7 to throw an error instead of # returning None when the model isn't found. So catch the # lookup error and keep self._model == None. pass return self._model def _set_model(self, obj): self._model = obj model = property(_get_model, _set_model) def _get_distance(self): from haystack.utils.geo import Distance if self._distance is None: # We didn't get it from the backend & we haven't tried calculating # it yet. Check if geopy is available to do it the "slow" way # (even though slow meant 100 distance calculations in 0.004 seconds # in my testing). if geopy_distance is None: raise SpatialError("The backend doesn't have 'DISTANCE_AVAILABLE' enabled & the 'geopy' library could not be imported, so distance information is not available.") if not self._point_of_origin: raise SpatialError("The original point is not available.") if not hasattr(self, self._point_of_origin['field']): raise SpatialError("The field '%s' was not included in search results, so the distance could not be calculated." % self._point_of_origin['field']) po_lng, po_lat = self._point_of_origin['point'].coords location_field = getattr(self, self._point_of_origin['field']) if location_field is None: return None lf_lng, lf_lat = location_field.coords self._distance = Distance(km=geopy_distance.distance((po_lat, po_lng), (lf_lat, lf_lng)).km) # We've either already calculated it or the backend returned it, so # let's use that. return self._distance def _set_distance(self, dist): self._distance = dist distance = property(_get_distance, _set_distance) def _get_verbose_name(self): if self.model is None: self.log.error("Model could not be found for SearchResult '%s'.", self) return u'' return force_text(capfirst(self.model._meta.verbose_name)) verbose_name = property(_get_verbose_name) def _get_verbose_name_plural(self): if self.model is None: self.log.error("Model could not be found for SearchResult '%s'.", self) return u'' return force_text(capfirst(self.model._meta.verbose_name_plural)) verbose_name_plural = property(_get_verbose_name_plural) def content_type(self): """Returns the content type for the result's model instance.""" if self.model is None: self.log.error("Model could not be found for SearchResult '%s'.", self) return u'' return six.text_type(self.model._meta) def get_additional_fields(self): """ Returns a dictionary of all of the fields from the raw result. Useful for serializing results. Only returns what was seen from the search engine, so it may have extra fields Haystack's indexes aren't aware of. """ additional_fields = {} for fieldname in self._additional_fields: additional_fields[fieldname] = getattr(self, fieldname) return additional_fields def get_stored_fields(self): """ Returns a dictionary of all of the stored fields from the SearchIndex. Useful for serializing results. Only returns the fields Haystack's indexes are aware of as being 'stored'. """ if self._stored_fields is None: from haystack import connections try: index = connections['default'].get_unified_index().get_index(self.model) except NotHandled: # Not found? Return nothing. return {} self._stored_fields = {} # Iterate through the index's fields, pulling out the fields that # are stored. for fieldname, field in index.fields.items(): if field.stored is True: self._stored_fields[fieldname] = getattr(self, fieldname, u'') return self._stored_fields def __getstate__(self): """ Returns a dictionary representing the ``SearchResult`` in order to make it pickleable. """ # The ``log`` is excluded because, under the hood, ``logging`` uses # ``threading.Lock``, which doesn't pickle well. ret_dict = self.__dict__.copy() del(ret_dict['log']) return ret_dict def __setstate__(self, data_dict): """ Updates the object's attributes according to data passed by pickle. """ self.__dict__.update(data_dict) self.log = self._get_log() def reload_indexes(sender, *args, **kwargs): from haystack import connections for conn in connections.all(): ui = conn.get_unified_index() # Note: Unlike above, we're resetting the ``UnifiedIndex`` here. # Thi gives us a clean slate. ui.reset() django-haystack-2.8.0/haystack/panels.py000066400000000000000000000054461325051407000202410ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import datetime from debug_toolbar.panels import DebugPanel from django.template.loader import render_to_string from django.utils import six from django.utils.translation import ugettext_lazy as _ from haystack import connections class HaystackDebugPanel(DebugPanel): """ Panel that displays information about the Haystack queries run while processing the request. """ name = 'Haystack' has_content = True def __init__(self, *args, **kwargs): super(self.__class__, self).__init__(*args, **kwargs) self._offset = dict((alias, len(connections[alias].queries)) for alias in connections.connections_info.keys()) self._search_time = 0 self._queries = [] self._backends = {} def nav_title(self): return _('Haystack') def nav_subtitle(self): self._queries = [] self._backends = {} for alias in connections.connections_info.keys(): search_queries = connections[alias].queries[self._offset[alias]:] self._backends[alias] = { 'time_spent': sum(float(q['time']) for q in search_queries), 'queries': len(search_queries), } self._queries.extend([(alias, q) for q in search_queries]) self._queries.sort(key=lambda x: x[1]['start']) self._search_time = sum([d['time_spent'] for d in self._backends.itervalues()]) num_queries = len(self._queries) return "%d %s in %.2fms" % ( num_queries, (num_queries == 1) and 'query' or 'queries', self._search_time ) def title(self): return _('Search Queries') def url(self): return '' def content(self): width_ratio_tally = 0 for alias, query in self._queries: query['alias'] = alias query['query'] = query['query_string'] if query.get('additional_kwargs'): if query['additional_kwargs'].get('result_class'): query['additional_kwargs']['result_class'] = six.text_type(query['additional_kwargs']['result_class']) try: query['width_ratio'] = (float(query['time']) / self._search_time) * 100 except ZeroDivisionError: query['width_ratio'] = 0 query['start_offset'] = width_ratio_tally width_ratio_tally += query['width_ratio'] context = self.context.copy() context.update({ 'backends': sorted(self._backends.items(), key=lambda x: -x[1]['time_spent']), 'queries': [q for a, q in self._queries], 'sql_time': self._search_time, }) return render_to_string('panels/haystack.html', context) django-haystack-2.8.0/haystack/query.py000066400000000000000000000633541325051407000201260ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import operator import warnings from django.utils import six from haystack import connection_router, connections from haystack.backends import SQ from haystack.constants import DEFAULT_OPERATOR, ITERATOR_LOAD_PER_QUERY from haystack.exceptions import NotHandled from haystack.inputs import AutoQuery, Raw from haystack.utils import log as logging class SearchQuerySet(object): """ Provides a way to specify search parameters and lazily load results. Supports chaining (a la QuerySet) to narrow the search. """ def __init__(self, using=None, query=None): # ``_using`` should only ever be a value other than ``None`` if it's # been forced with the ``.using`` method. self._using = using self.query = None self._determine_backend() # If ``query`` is present, it should override even what the routers # think. if query is not None: self.query = query self._result_cache = [] self._result_count = None self._cache_full = False self._load_all = False self._ignored_result_count = 0 self.log = logging.getLogger('haystack') def _determine_backend(self): # A backend has been manually selected. Use it instead. if self._using is not None: self.query = connections[self._using].get_query() return # No backend, so rely on the routers to figure out what's right. hints = {} if self.query: hints['models'] = self.query.models backend_alias = connection_router.for_read(**hints) # The ``SearchQuery`` might swap itself out for a different variant # here. if self.query: self.query = self.query.using(backend_alias) else: self.query = connections[backend_alias].get_query() def __getstate__(self): """ For pickling. """ len(self) obj_dict = self.__dict__.copy() obj_dict['_iter'] = None obj_dict['log'] = None return obj_dict def __setstate__(self, data_dict): """ For unpickling. """ self.__dict__ = data_dict self.log = logging.getLogger('haystack') def __repr__(self): return u"" % (self.query, self._using) def __len__(self): if self._result_count is None: self._result_count = self.query.get_count() # Some backends give weird, false-y values here. Convert to zero. if not self._result_count: self._result_count = 0 # This needs to return the actual number of hits, not what's in the cache. return self._result_count - self._ignored_result_count def __iter__(self): if self._cache_is_full(): # We've got a fully populated cache. Let Python do the hard work. return iter(self._result_cache) return self._manual_iter() def __and__(self, other): if isinstance(other, EmptySearchQuerySet): return other._clone() combined = self._clone() combined.query.combine(other.query, SQ.AND) return combined def __or__(self, other): combined = self._clone() if isinstance(other, EmptySearchQuerySet): return combined combined.query.combine(other.query, SQ.OR) return combined def _cache_is_full(self): if not self.query.has_run(): return False if len(self) <= 0: return True try: self._result_cache.index(None) return False except ValueError: # No ``None``s found in the results. Check the length of the cache. return len(self._result_cache) > 0 def _manual_iter(self): # If we're here, our cache isn't fully populated. # For efficiency, fill the cache as we go if we run out of results. # Also, this can't be part of the __iter__ method due to Python's rules # about generator functions. current_position = 0 current_cache_max = 0 while True: if len(self._result_cache) > 0: try: current_cache_max = self._result_cache.index(None) except ValueError: current_cache_max = len(self._result_cache) while current_position < current_cache_max: yield self._result_cache[current_position] current_position += 1 if self._cache_is_full(): return # We've run out of results and haven't hit our limit. # Fill more of the cache. if not self._fill_cache(current_position, current_position + ITERATOR_LOAD_PER_QUERY): return def post_process_results(self, results): to_cache = [] # Check if we wish to load all objects. if self._load_all: models_pks = {} loaded_objects = {} # Remember the search position for each result so we don't have to resort later. for result in results: models_pks.setdefault(result.model, []).append(result.pk) # Load the objects for each model in turn. for model in models_pks: loaded_objects[model] = self._load_model_objects(model, models_pks[model]) for result in results: if self._load_all: model_objects = loaded_objects.get(result.model, {}) # Try to coerce a primary key object that matches the models pk # We have to deal with semi-arbitrary keys being cast from strings (UUID, int, etc) if model_objects: result_klass = type(next(iter(model_objects))) result.pk = result_klass(result.pk) try: result._object = model_objects[result.pk] except KeyError: # The object was either deleted since we indexed or should # be ignored for other reasons such as an overriden 'load_all_queryset'; # fail silently. self._ignored_result_count += 1 # avoid an unfilled None at the end of the result cache self._result_cache.pop() continue else: # No objects were returned -- possible due to SQS nesting such as # XYZ.objects.filter(id__gt=10) where the amount ignored are # exactly equal to the ITERATOR_LOAD_PER_QUERY del self._result_cache[:len(results)] self._ignored_result_count += len(results) break to_cache.append(result) return to_cache def _load_model_objects(self, model, pks): try: ui = connections[self.query._using].get_unified_index() index = ui.get_index(model) objects = index.read_queryset(using=self.query._using) return objects.in_bulk(pks) except NotHandled: self.log.warning("Model '%s' not handled by the routers.", model) # Revert to old behaviour return model._default_manager.in_bulk(pks) def _fill_cache(self, start, end, **kwargs): # Tell the query where to start from and how many we'd like. self.query._reset() if start is None: start = 0 query_start = start query_start += self._ignored_result_count query_end = end if query_end is not None: query_end += self._ignored_result_count self.query.set_limits(query_start, query_end) results = self.query.get_results(**kwargs) if results is None or len(results) == 0: # trim missing stuff from the result cache self._result_cache = self._result_cache[:start] return False # Setup the full cache now that we know how many results there are. # We need the ``None``s as placeholders to know what parts of the # cache we have/haven't filled. # Using ``None`` like this takes up very little memory. In testing, # an array of 100,000 ``None``s consumed less than .5 Mb, which ought # to be an acceptable loss for consistent and more efficient caching. if len(self._result_cache) == 0: self._result_cache = [None] * self.query.get_count() fill_start, fill_end = start, end if fill_end is None: fill_end = self.query.get_count() cache_start = fill_start while True: to_cache = self.post_process_results(results) # Assign by slice. self._result_cache[cache_start:cache_start + len(to_cache)] = to_cache if None in self._result_cache[start:end]: fill_start = fill_end fill_end += ITERATOR_LOAD_PER_QUERY cache_start += len(to_cache) # Tell the query where to start from and how many we'd like. self.query._reset() self.query.set_limits(fill_start, fill_end) results = self.query.get_results() if results is None or len(results) == 0: # No more results. Trim missing stuff from the result cache self._result_cache = self._result_cache[:cache_start] break else: break return True def __getitem__(self, k): """ Retrieves an item or slice from the set of results. """ if not isinstance(k, (slice, six.integer_types)): raise TypeError assert ((not isinstance(k, slice) and (k >= 0)) or (isinstance(k, slice) and (k.start is None or k.start >= 0) and (k.stop is None or k.stop >= 0))), \ "Negative indexing is not supported." # Remember if it's a slice or not. We're going to treat everything as # a slice to simply the logic and will `.pop()` at the end as needed. if isinstance(k, slice): is_slice = True start = k.start if k.stop is not None: bound = int(k.stop) else: bound = None else: is_slice = False start = k bound = k + 1 # We need check to see if we need to populate more of the cache. if len(self._result_cache) <= 0 or (None in self._result_cache[start:bound] and not self._cache_is_full()): try: self._fill_cache(start, bound) except StopIteration: # There's nothing left, even though the bound is higher. pass # Cache should be full enough for our needs. if is_slice: return self._result_cache[start:bound] else: return self._result_cache[start] # Methods that return a SearchQuerySet. def all(self): """Returns all results for the query.""" return self._clone() def none(self): """Returns an empty result list for the query.""" return self._clone(klass=EmptySearchQuerySet) def filter(self, *args, **kwargs): """Narrows the search based on certain attributes and the default operator.""" if DEFAULT_OPERATOR == 'OR': return self.filter_or(*args, **kwargs) else: return self.filter_and(*args, **kwargs) def exclude(self, *args, **kwargs): """Narrows the search by ensuring certain attributes are not included.""" clone = self._clone() clone.query.add_filter(~SQ(*args, **kwargs)) return clone def filter_and(self, *args, **kwargs): """Narrows the search by looking for (and including) certain attributes.""" clone = self._clone() clone.query.add_filter(SQ(*args, **kwargs)) return clone def filter_or(self, *args, **kwargs): """Narrows the search by ensuring certain attributes are not included.""" clone = self._clone() clone.query.add_filter(SQ(*args, **kwargs), use_or=True) return clone def order_by(self, *args): """Alters the order in which the results should appear.""" clone = self._clone() for field in args: clone.query.add_order_by(field) return clone def highlight(self, **kwargs): """Adds highlighting to the results.""" clone = self._clone() clone.query.add_highlight(**kwargs) return clone def models(self, *models): """Accepts an arbitrary number of Model classes to include in the search.""" clone = self._clone() for model in models: if model not in connections[self.query._using].get_unified_index().get_indexed_models(): warnings.warn('The model %r is not registered for search.' % (model,)) clone.query.add_model(model) return clone def result_class(self, klass): """ Allows specifying a different class to use for results. Overrides any previous usages. If ``None`` is provided, Haystack will revert back to the default ``SearchResult`` object. """ clone = self._clone() clone.query.set_result_class(klass) return clone def boost(self, term, boost): """Boosts a certain aspect of the query.""" clone = self._clone() clone.query.add_boost(term, boost) return clone def facet(self, field, **options): """Adds faceting to a query for the provided field.""" clone = self._clone() clone.query.add_field_facet(field, **options) return clone def within(self, field, point_1, point_2): """Spatial: Adds a bounding box search to the query.""" clone = self._clone() clone.query.add_within(field, point_1, point_2) return clone def dwithin(self, field, point, distance): """Spatial: Adds a distance-based search to the query.""" clone = self._clone() clone.query.add_dwithin(field, point, distance) return clone def stats(self, field): """Adds stats to a query for the provided field.""" return self.stats_facet(field, facet_fields=None) def stats_facet(self, field, facet_fields=None): """Adds stats facet for the given field and facet_fields represents the faceted fields.""" clone = self._clone() stats_facets = [] try: stats_facets.append(sum(facet_fields, [])) except TypeError: if facet_fields: stats_facets.append(facet_fields) clone.query.add_stats_query(field, stats_facets) return clone def distance(self, field, point): """ Spatial: Denotes results must have distance measurements from the provided point. """ clone = self._clone() clone.query.add_distance(field, point) return clone def date_facet(self, field, start_date, end_date, gap_by, gap_amount=1): """Adds faceting to a query for the provided field by date.""" clone = self._clone() clone.query.add_date_facet(field, start_date, end_date, gap_by, gap_amount=gap_amount) return clone def query_facet(self, field, query): """Adds faceting to a query for the provided field with a custom query.""" clone = self._clone() clone.query.add_query_facet(field, query) return clone def narrow(self, query): """Pushes existing facet choices into the search.""" if isinstance(query, SQ): # produce query string using empty query of the same class empty_query = self.query._clone() empty_query._reset() query = query.as_query_string(empty_query.build_query_fragment) clone = self._clone() clone.query.add_narrow_query(query) return clone def raw_search(self, query_string, **kwargs): """Passes a raw query directly to the backend.""" return self.filter(content=Raw(query_string, **kwargs)) def load_all(self): """Efficiently populates the objects in the search results.""" clone = self._clone() clone._load_all = True return clone def auto_query(self, query_string, fieldname='content'): """ Performs a best guess constructing the search query. This method is somewhat naive but works well enough for the simple, common cases. """ kwargs = { fieldname: AutoQuery(query_string) } return self.filter(**kwargs) def autocomplete(self, **kwargs): """ A shortcut method to perform an autocomplete search. Must be run against fields that are either ``NgramField`` or ``EdgeNgramField``. """ clone = self._clone() query_bits = [] for field_name, query in kwargs.items(): for word in query.split(' '): bit = clone.query.clean(word.strip()) if bit: kwargs = { field_name: bit, } query_bits.append(SQ(**kwargs)) return clone.filter(six.moves.reduce(operator.__and__, query_bits)) def using(self, connection_name): """ Allows switching which connection the ``SearchQuerySet`` uses to search in. """ clone = self._clone() clone.query = self.query.using(connection_name) clone._using = connection_name return clone # Methods that do not return a SearchQuerySet. def count(self): """Returns the total number of matching results.""" return len(self) def best_match(self): """Returns the best/top search result that matches the query.""" return self[0] def latest(self, date_field): """Returns the most recent search result that matches the query.""" clone = self._clone() clone.query.clear_order_by() clone.query.add_order_by("-%s" % date_field) return clone.best_match() def more_like_this(self, model_instance): """Finds similar results to the object passed in.""" clone = self._clone() clone.query.more_like_this(model_instance) return clone def facet_counts(self): """ Returns the facet counts found by the query. This will cause the query to execute and should generally be used when presenting the data. """ if self.query.has_run(): return self.query.get_facet_counts() else: clone = self._clone() return clone.query.get_facet_counts() def stats_results(self): """ Returns the stats results found by the query. """ if self.query.has_run(): return self.query.get_stats() else: clone = self._clone() return clone.query.get_stats() def set_spelling_query(self, spelling_query): """Set the exact text to be used to generate spelling suggestions When making complicated queries, such as the alt parser mechanism used by Solr dismax/edismax, this provides a convenient way to set the a simple text string which will be used to generate spelling suggestions without including unnecessary syntax. """ clone = self._clone() clone.query.set_spelling_query(spelling_query) return clone def spelling_suggestion(self, preferred_query=None): """ Returns the spelling suggestion found by the query. To work, you must set ``INCLUDE_SPELLING`` within your connection's settings dictionary to ``True``. Otherwise, ``None`` will be returned. This will cause the query to execute and should generally be used when presenting the data. """ if self.query.has_run(): return self.query.get_spelling_suggestion(preferred_query) else: clone = self._clone() return clone.query.get_spelling_suggestion(preferred_query) def values(self, *fields): """ Returns a list of dictionaries, each containing the key/value pairs for the result, exactly like Django's ``ValuesQuerySet``. """ qs = self._clone(klass=ValuesSearchQuerySet) qs._fields.extend(fields) return qs def values_list(self, *fields, **kwargs): """ Returns a list of field values as tuples, exactly like Django's ``QuerySet.values``. Optionally accepts a ``flat=True`` kwarg, which in the case of a single field being provided, will return a flat list of that field rather than a list of tuples. """ flat = kwargs.pop("flat", False) if flat and len(fields) > 1: raise TypeError("'flat' is not valid when values_list is called with more than one field.") qs = self._clone(klass=ValuesListSearchQuerySet) qs._fields.extend(fields) qs._flat = flat return qs # Utility methods. def _clone(self, klass=None): if klass is None: klass = self.__class__ query = self.query._clone() clone = klass(query=query) clone._load_all = self._load_all return clone class EmptySearchQuerySet(SearchQuerySet): """ A stubbed SearchQuerySet that behaves as normal but always returns no results. """ def __len__(self): return 0 def _cache_is_full(self): # Pretend the cache is always full with no results. return True def _clone(self, klass=None): clone = super(EmptySearchQuerySet, self)._clone(klass=klass) clone._result_cache = [] return clone def _fill_cache(self, start, end): return False def facet_counts(self): return {} class ValuesListSearchQuerySet(SearchQuerySet): """ A ``SearchQuerySet`` which returns a list of field values as tuples, exactly like Django's ``ValuesListQuerySet``. """ def __init__(self, *args, **kwargs): super(ValuesListSearchQuerySet, self).__init__(*args, **kwargs) self._flat = False self._fields = [] # Removing this dependency would require refactoring much of the backend # code (_process_results, etc.) and these aren't large enough to make it # an immediate priority: self._internal_fields = ['id', 'django_ct', 'django_id', 'score'] def _clone(self, klass=None): clone = super(ValuesListSearchQuerySet, self)._clone(klass=klass) clone._fields = self._fields clone._flat = self._flat return clone def _fill_cache(self, start, end): query_fields = set(self._internal_fields) query_fields.update(self._fields) kwargs = { 'fields': query_fields } return super(ValuesListSearchQuerySet, self)._fill_cache(start, end, **kwargs) def post_process_results(self, results): to_cache = [] if self._flat: accum = to_cache.extend else: accum = to_cache.append for result in results: accum([getattr(result, i, None) for i in self._fields]) return to_cache class ValuesSearchQuerySet(ValuesListSearchQuerySet): """ A ``SearchQuerySet`` which returns a list of dictionaries, each containing the key/value pairs for the result, exactly like Django's ``ValuesQuerySet``. """ def _fill_cache(self, start, end): query_fields = set(self._internal_fields) query_fields.update(self._fields) kwargs = { 'fields': query_fields } return super(ValuesListSearchQuerySet, self)._fill_cache(start, end, **kwargs) def post_process_results(self, results): to_cache = [] for result in results: to_cache.append(dict((i, getattr(result, i, None)) for i in self._fields)) return to_cache class RelatedSearchQuerySet(SearchQuerySet): """ A variant of the SearchQuerySet that can handle `load_all_queryset`s. """ def __init__(self, *args, **kwargs): super(RelatedSearchQuerySet, self).__init__(*args, **kwargs) self._load_all_querysets = {} self._result_cache = [] def _load_model_objects(self, model, pks): if model in self._load_all_querysets: # Use the overriding queryset. return self._load_all_querysets[model].in_bulk(pks) else: # Check the SearchIndex for the model for an override. try: ui = connections[self.query._using].get_unified_index() index = ui.get_index(model) qs = index.load_all_queryset() return qs.in_bulk(pks) except NotHandled: # The model returned doesn't seem to be handled by the # routers. We should silently fail and populate # nothing for those objects. return {} def load_all_queryset(self, model, queryset): """ Allows for specifying a custom ``QuerySet`` that changes how ``load_all`` will fetch records for the provided model. This is useful for post-processing the results from the query, enabling things like adding ``select_related`` or filtering certain data. """ clone = self._clone() clone._load_all_querysets[model] = queryset return clone def _clone(self, klass=None): clone = super(RelatedSearchQuerySet, self)._clone(klass=klass) clone._load_all_querysets = self._load_all_querysets return clone django-haystack-2.8.0/haystack/routers.py000066400000000000000000000005771325051407000204620ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from haystack.constants import DEFAULT_ALIAS class BaseRouter(object): # Reserved for future extension. pass class DefaultRouter(BaseRouter): def for_read(self, **hints): return DEFAULT_ALIAS def for_write(self, **hints): return DEFAULT_ALIAS django-haystack-2.8.0/haystack/signals.py000066400000000000000000000061331325051407000204110ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.db import models from haystack.exceptions import NotHandled class BaseSignalProcessor(object): """ A convenient way to attach Haystack to Django's signals & cause things to index. By default, does nothing with signals but provides underlying functionality. """ def __init__(self, connections, connection_router): self.connections = connections self.connection_router = connection_router self.setup() def setup(self): """ A hook for setting up anything necessary for ``handle_save/handle_delete`` to be executed. Default behavior is to do nothing (``pass``). """ # Do nothing. pass def teardown(self): """ A hook for tearing down anything necessary for ``handle_save/handle_delete`` to no longer be executed. Default behavior is to do nothing (``pass``). """ # Do nothing. pass def handle_save(self, sender, instance, **kwargs): """ Given an individual model instance, determine which backends the update should be sent to & update the object on those backends. """ using_backends = self.connection_router.for_write(instance=instance) for using in using_backends: try: index = self.connections[using].get_unified_index().get_index(sender) index.update_object(instance, using=using) except NotHandled: # TODO: Maybe log it or let the exception bubble? pass def handle_delete(self, sender, instance, **kwargs): """ Given an individual model instance, determine which backends the delete should be sent to & delete the object on those backends. """ using_backends = self.connection_router.for_write(instance=instance) for using in using_backends: try: index = self.connections[using].get_unified_index().get_index(sender) index.remove_object(instance, using=using) except NotHandled: # TODO: Maybe log it or let the exception bubble? pass class RealtimeSignalProcessor(BaseSignalProcessor): """ Allows for observing when saves/deletes fire & automatically updates the search engine appropriately. """ def setup(self): # Naive (listen to all model saves). models.signals.post_save.connect(self.handle_save) models.signals.post_delete.connect(self.handle_delete) # Efficient would be going through all backends & collecting all models # being used, then hooking up signals only for those. def teardown(self): # Naive (listen to all model saves). models.signals.post_save.disconnect(self.handle_save) models.signals.post_delete.disconnect(self.handle_delete) # Efficient would be going through all backends & collecting all models # being used, then disconnecting signals only for those. django-haystack-2.8.0/haystack/templates/000077500000000000000000000000001325051407000203725ustar00rootroot00000000000000django-haystack-2.8.0/haystack/templates/panels/000077500000000000000000000000001325051407000216545ustar00rootroot00000000000000django-haystack-2.8.0/haystack/templates/panels/haystack.html000066400000000000000000000025111325051407000243500ustar00rootroot00000000000000{% load i18n %} {% for query in queries %} {% endfor %}
{% trans 'Query' %} {% trans 'Backend Alias' %} {% trans 'Timeline' %} {% trans 'Time' %} (ms) {% trans 'Kwargs' %}
{{ query.query_string|safe }}
{{ query.alias }}   {{ query.time }} {% for key, value in query.additional_kwargs.items %} '{{ key }}': {{ value|stringformat:"r" }}
{% endfor %}
django-haystack-2.8.0/haystack/templates/search_configuration/000077500000000000000000000000001325051407000245665ustar00rootroot00000000000000django-haystack-2.8.0/haystack/templates/search_configuration/schema.xml000066400000000000000000001621271325051407000265610ustar00rootroot00000000000000 {% for field in fields %} {% endfor %} {{ ID }} id django-haystack-2.8.0/haystack/templates/search_configuration/solrconfig.xml000066400000000000000000001601171325051407000274630ustar00rootroot00000000000000 6.5.0 ${solr.data.dir:} ${solr.lock.type:native} ${solr.ulog.dir:} ${solr.ulog.numVersionBuckets:65536} ${solr.autoCommit.maxTime:15000} false ${solr.autoSoftCommit.maxTime:-1} 1024 true 20 200 false explicit 10 default on true 10 5 5 true true 10 5 spellcheck explicit json true explicit {{ content_field_name }} add-unknown-fields-to-the-schema solrpingquery all true ignored_ {{ content_field_name }} text_en default {{ content_field_name }} solr.DirectSolrSpellChecker internal 0.5 2 1 5 4 0.01 default on true 10 5 5 true true 10 5 spellcheck true tvComponent true false terms string elevate.xml explicit elevator 100 70 0.5 [-\w ,/\n\"']{20,200} ]]> ]]> ,, ,, ,, ,, ,]]> ]]> 10 .,!? WORD en US [^\w-\.] _ yyyy-MM-dd'T'HH:mm:ss.SSSZ yyyy-MM-dd'T'HH:mm:ss,SSSZ yyyy-MM-dd'T'HH:mm:ss.SSS yyyy-MM-dd'T'HH:mm:ss,SSS yyyy-MM-dd'T'HH:mm:ssZ yyyy-MM-dd'T'HH:mm:ss yyyy-MM-dd'T'HH:mmZ yyyy-MM-dd'T'HH:mm yyyy-MM-dd HH:mm:ss.SSSZ yyyy-MM-dd HH:mm:ss,SSSZ yyyy-MM-dd HH:mm:ss.SSS yyyy-MM-dd HH:mm:ss,SSS yyyy-MM-dd HH:mm:ssZ yyyy-MM-dd HH:mm:ss yyyy-MM-dd HH:mmZ yyyy-MM-dd HH:mm yyyy-MM-dd text/plain; charset=UTF-8 ${velocity.template.base.dir:} ${velocity.solr.resource.loader.enabled:true} ${velocity.params.resource.loader.enabled:false} 5 django-haystack-2.8.0/haystack/templatetags/000077500000000000000000000000001325051407000210665ustar00rootroot00000000000000django-haystack-2.8.0/haystack/templatetags/__init__.py000066400000000000000000000000001325051407000231650ustar00rootroot00000000000000django-haystack-2.8.0/haystack/templatetags/highlight.py000066400000000000000000000102101325051407000234010ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django import template from django.conf import settings from django.core.exceptions import ImproperlyConfigured from django.utils import six from haystack.utils import importlib register = template.Library() class HighlightNode(template.Node): def __init__(self, text_block, query, html_tag=None, css_class=None, max_length=None): self.text_block = template.Variable(text_block) self.query = template.Variable(query) self.html_tag = html_tag self.css_class = css_class self.max_length = max_length if html_tag is not None: self.html_tag = template.Variable(html_tag) if css_class is not None: self.css_class = template.Variable(css_class) if max_length is not None: self.max_length = template.Variable(max_length) def render(self, context): text_block = self.text_block.resolve(context) query = self.query.resolve(context) kwargs = {} if self.html_tag is not None: kwargs['html_tag'] = self.html_tag.resolve(context) if self.css_class is not None: kwargs['css_class'] = self.css_class.resolve(context) if self.max_length is not None: kwargs['max_length'] = self.max_length.resolve(context) # Handle a user-defined highlighting function. if hasattr(settings, 'HAYSTACK_CUSTOM_HIGHLIGHTER') and settings.HAYSTACK_CUSTOM_HIGHLIGHTER: # Do the import dance. try: path_bits = settings.HAYSTACK_CUSTOM_HIGHLIGHTER.split('.') highlighter_path, highlighter_classname = '.'.join(path_bits[:-1]), path_bits[-1] highlighter_module = importlib.import_module(highlighter_path) highlighter_class = getattr(highlighter_module, highlighter_classname) except (ImportError, AttributeError) as e: raise ImproperlyConfigured("The highlighter '%s' could not be imported: %s" % (settings.HAYSTACK_CUSTOM_HIGHLIGHTER, e)) else: from haystack.utils.highlighting import Highlighter highlighter_class = Highlighter highlighter = highlighter_class(query, **kwargs) highlighted_text = highlighter.highlight(text_block) return highlighted_text @register.tag def highlight(parser, token): """ Takes a block of text and highlights words from a provided query within that block of text. Optionally accepts arguments to provide the HTML tag to wrap highlighted word in, a CSS class to use with the tag and a maximum length of the blurb in characters. Syntax:: {% highlight with [css_class "class_name"] [html_tag "span"] [max_length 200] %} Example:: # Highlight summary with default behavior. {% highlight result.summary with request.query %} # Highlight summary but wrap highlighted words with a div and the # following CSS class. {% highlight result.summary with request.query html_tag "div" css_class "highlight_me_please" %} # Highlight summary but only show 40 characters. {% highlight result.summary with request.query max_length 40 %} """ bits = token.split_contents() tag_name = bits[0] if not len(bits) % 2 == 0: raise template.TemplateSyntaxError(u"'%s' tag requires valid pairings arguments." % tag_name) text_block = bits[1] if len(bits) < 4: raise template.TemplateSyntaxError(u"'%s' tag requires an object and a query provided by 'with'." % tag_name) if bits[2] != 'with': raise template.TemplateSyntaxError(u"'%s' tag's second argument should be 'with'." % tag_name) query = bits[3] arg_bits = iter(bits[4:]) kwargs = {} for bit in arg_bits: if bit == 'css_class': kwargs['css_class'] = six.next(arg_bits) if bit == 'html_tag': kwargs['html_tag'] = six.next(arg_bits) if bit == 'max_length': kwargs['max_length'] = six.next(arg_bits) return HighlightNode(text_block, query, **kwargs) django-haystack-2.8.0/haystack/templatetags/more_like_this.py000066400000000000000000000062671325051407000244500ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django import template from haystack.utils.app_loading import haystack_get_model from haystack.query import SearchQuerySet register = template.Library() class MoreLikeThisNode(template.Node): def __init__(self, model, varname, for_types=None, limit=None): self.model = template.Variable(model) self.varname = varname self.for_types = for_types self.limit = limit if not self.limit is None: self.limit = int(self.limit) def render(self, context): try: model_instance = self.model.resolve(context) sqs = SearchQuerySet() if not self.for_types is None: intermediate = template.Variable(self.for_types) for_types = intermediate.resolve(context).split(',') search_models = [] for model in for_types: model_class = haystack_get_model(*model.split('.')) if model_class: search_models.append(model_class) sqs = sqs.models(*search_models) sqs = sqs.more_like_this(model_instance) if not self.limit is None: sqs = sqs[:self.limit] context[self.varname] = sqs except: pass return '' @register.tag def more_like_this(parser, token): """ Fetches similar items from the search index to find content that is similar to the provided model's content. Syntax:: {% more_like_this model_instance as varname [for app_label.model_name,app_label.model_name,...] [limit n] %} Example:: # Pull a full SearchQuerySet (lazy loaded) of similar content. {% more_like_this entry as related_content %} # Pull just the top 5 similar pieces of content. {% more_like_this entry as related_content limit 5 %} # Pull just the top 5 similar entries or comments. {% more_like_this entry as related_content for "blog.entry,comments.comment" limit 5 %} """ bits = token.split_contents() if not len(bits) in (4, 6, 8): raise template.TemplateSyntaxError(u"'%s' tag requires either 3, 5 or 7 arguments." % bits[0]) model = bits[1] if bits[2] != 'as': raise template.TemplateSyntaxError(u"'%s' tag's second argument should be 'as'." % bits[0]) varname = bits[3] limit = None for_types = None if len(bits) == 6: if bits[4] != 'limit' and bits[4] != 'for': raise template.TemplateSyntaxError(u"'%s' tag's fourth argument should be either 'limit' or 'for'." % bits[0]) if bits[4] == 'limit': limit = bits[5] else: for_types = bits[5] if len(bits) == 8: if bits[4] != 'for': raise template.TemplateSyntaxError(u"'%s' tag's fourth argument should be 'for'." % bits[0]) for_types = bits[5] if bits[6] != 'limit': raise template.TemplateSyntaxError(u"'%s' tag's sixth argument should be 'limit'." % bits[0]) limit = bits[7] return MoreLikeThisNode(model, varname, for_types, limit) django-haystack-2.8.0/haystack/urls.py000066400000000000000000000003701325051407000177330ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.conf.urls import url from haystack.views import SearchView urlpatterns = [ url(r'^$', SearchView(), name='haystack_search'), ] django-haystack-2.8.0/haystack/utils/000077500000000000000000000000001325051407000175345ustar00rootroot00000000000000django-haystack-2.8.0/haystack/utils/__init__.py000066400000000000000000000047511325051407000216540ustar00rootroot00000000000000# encoding: utf-8 from __future__ import unicode_literals import importlib import re from django.conf import settings from django.utils import six from haystack.constants import ID, DJANGO_CT, DJANGO_ID from haystack.utils.highlighting import Highlighter IDENTIFIER_REGEX = re.compile('^[\w\d_]+\.[\w\d_]+\.[\w\d-]+$') def default_get_identifier(obj_or_string): """ Get an unique identifier for the object or a string representing the object. If not overridden, uses ... """ if isinstance(obj_or_string, six.string_types): if not IDENTIFIER_REGEX.match(obj_or_string): raise AttributeError(u"Provided string '%s' is not a valid identifier." % obj_or_string) return obj_or_string return u"%s.%s" % (get_model_ct(obj_or_string), obj_or_string._get_pk_val()) def _lookup_identifier_method(): """ If the user has set HAYSTACK_IDENTIFIER_METHOD, import it and return the method uncalled. If HAYSTACK_IDENTIFIER_METHOD is not defined, return haystack.utils.default_get_identifier. This always runs at module import time. We keep the code in a function so that it can be called from unit tests, in order to simulate the re-loading of this module. """ if not hasattr(settings, 'HAYSTACK_IDENTIFIER_METHOD'): return default_get_identifier module_path, method_name = settings.HAYSTACK_IDENTIFIER_METHOD.rsplit(".", 1) try: module = importlib.import_module(module_path) except ImportError: raise ImportError(u"Unable to import module '%s' provided for HAYSTACK_IDENTIFIER_METHOD." % module_path) identifier_method = getattr(module, method_name, None) if not identifier_method: raise AttributeError( u"Provided method '%s' for HAYSTACK_IDENTIFIER_METHOD does not exist in '%s'." % (method_name, module_path) ) return identifier_method get_identifier = _lookup_identifier_method() def get_model_ct_tuple(model): # Deferred models should be identified as if they were the underlying model. model_name = model._meta.concrete_model._meta.model_name \ if hasattr(model, '_deferred') and model._deferred else model._meta.model_name return (model._meta.app_label, model_name) def get_model_ct(model): return "%s.%s" % get_model_ct_tuple(model) def get_facet_field_name(fieldname): if fieldname in [ID, DJANGO_ID, DJANGO_CT]: return fieldname return "%s_exact" % fieldname django-haystack-2.8.0/haystack/utils/app_loading.py000077500000000000000000000021511325051407000223650ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.apps import apps from django.core.exceptions import ImproperlyConfigured __all__ = ['haystack_get_models', 'haystack_load_apps'] APP = 'app' MODEL = 'model' def haystack_get_app_modules(): """Return the Python module for each installed app""" return [i.module for i in apps.get_app_configs()] def haystack_load_apps(): """Return a list of app labels for all installed applications which have models""" return [i.label for i in apps.get_app_configs() if i.models_module is not None] def haystack_get_models(label): try: app_mod = apps.get_app_config(label) return app_mod.get_models() except LookupError: if '.' not in label: raise ImproperlyConfigured('Unknown application label {}'.format(label)) app_label, model_name = label.rsplit('.', 1) return [apps.get_model(app_label, model_name)] except ImproperlyConfigured: pass def haystack_get_model(app_label, model_name): return apps.get_model(app_label, model_name) django-haystack-2.8.0/haystack/utils/geo.py000066400000000000000000000042761325051407000206710ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.contrib.gis.geos import Point from django.contrib.gis.measure import D, Distance from haystack.constants import WGS_84_SRID from haystack.exceptions import SpatialError def ensure_geometry(geom): """ Makes sure the parameter passed in looks like a GEOS ``GEOSGeometry``. """ if not hasattr(geom, 'geom_type'): raise SpatialError("Point '%s' doesn't appear to be a GEOS geometry." % geom) return geom def ensure_point(geom): """ Makes sure the parameter passed in looks like a GEOS ``Point``. """ ensure_geometry(geom) if geom.geom_type != 'Point': raise SpatialError("Provided geometry '%s' is not a 'Point'." % geom) return geom def ensure_wgs84(point): """ Ensures the point passed in is a GEOS ``Point`` & returns that point's data is in the WGS-84 spatial reference. """ ensure_point(point) # Clone it so we don't alter the original, in case they're using it for # something else. new_point = point.clone() if not new_point.srid: # It has no spatial reference id. Assume WGS-84. new_point.srid = WGS_84_SRID elif new_point.srid != WGS_84_SRID: # Transform it to get to the right system. new_point.transform(WGS_84_SRID) return new_point def ensure_distance(dist): """ Makes sure the parameter passed in is a 'Distance' object. """ try: # Since we mostly only care about the ``.km`` attribute, make sure # it's there. km = dist.km except AttributeError: raise SpatialError("'%s' does not appear to be a 'Distance' object." % dist) return dist def generate_bounding_box(bottom_left, top_right): """ Takes two opposite corners of a bounding box (order matters!) & generates a two-tuple of the correct coordinates for the bounding box. The two-tuple is in the form ``((min_lat, min_lng), (max_lat, max_lng))``. """ west, lat_1 = bottom_left.coords east, lat_2 = top_right.coords min_lat, max_lat = min(lat_1, lat_2), max(lat_1, lat_2) return ((min_lat, west), (max_lat, east)) django-haystack-2.8.0/haystack/utils/highlighting.py000066400000000000000000000131011325051407000225470ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.utils.html import strip_tags class Highlighter(object): css_class = 'highlighted' html_tag = 'span' max_length = 200 text_block = '' def __init__(self, query, **kwargs): self.query = query if 'max_length' in kwargs: self.max_length = int(kwargs['max_length']) if 'html_tag' in kwargs: self.html_tag = kwargs['html_tag'] if 'css_class' in kwargs: self.css_class = kwargs['css_class'] self.query_words = set([word.lower() for word in self.query.split() if not word.startswith('-')]) def highlight(self, text_block): self.text_block = strip_tags(text_block) highlight_locations = self.find_highlightable_words() start_offset, end_offset = self.find_window(highlight_locations) return self.render_html(highlight_locations, start_offset, end_offset) def find_highlightable_words(self): # Use a set so we only do this once per unique word. word_positions = {} # Pre-compute the length. end_offset = len(self.text_block) lower_text_block = self.text_block.lower() for word in self.query_words: if not word in word_positions: word_positions[word] = [] start_offset = 0 while start_offset < end_offset: next_offset = lower_text_block.find(word, start_offset, end_offset) # If we get a -1 out of find, it wasn't found. Bomb out and # start the next word. if next_offset == -1: break word_positions[word].append(next_offset) start_offset = next_offset + len(word) return word_positions def find_window(self, highlight_locations): best_start = 0 best_end = self.max_length # First, make sure we have words. if not len(highlight_locations): return (best_start, best_end) words_found = [] # Next, make sure we found any words at all. for word, offset_list in highlight_locations.items(): if len(offset_list): # Add all of the locations to the list. words_found.extend(offset_list) if not len(words_found): return (best_start, best_end) if len(words_found) == 1: return (words_found[0], words_found[0] + self.max_length) # Sort the list so it's in ascending order. words_found = sorted(words_found) # We now have a denormalized list of all positions were a word was # found. We'll iterate through and find the densest window we can by # counting the number of found offsets (-1 to fit in the window). highest_density = 0 if words_found[:-1][0] > self.max_length: best_start = words_found[:-1][0] best_end = best_start + self.max_length for count, start in enumerate(words_found[:-1]): current_density = 1 for end in words_found[count + 1:]: if end - start < self.max_length: current_density += 1 else: current_density = 0 # Only replace if we have a bigger (not equal density) so we # give deference to windows earlier in the document. if current_density > highest_density: best_start = start best_end = start + self.max_length highest_density = current_density return (best_start, best_end) def render_html(self, highlight_locations=None, start_offset=None, end_offset=None): # Start by chopping the block down to the proper window. text = self.text_block[start_offset:end_offset] # Invert highlight_locations to a location -> term list term_list = [] for term, locations in highlight_locations.items(): term_list += [(loc - start_offset, term) for loc in locations] loc_to_term = sorted(term_list) # Prepare the highlight template if self.css_class: hl_start = '<%s class="%s">' % (self.html_tag, self.css_class) else: hl_start = '<%s>' % (self.html_tag) hl_end = '' % self.html_tag # Copy the part from the start of the string to the first match, # and there replace the match with a highlighted version. highlighted_chunk = "" matched_so_far = 0 prev = 0 prev_str = "" for cur, cur_str in loc_to_term: # This can be in a different case than cur_str actual_term = text[cur:cur + len(cur_str)] # Handle incorrect highlight_locations by first checking for the term if actual_term.lower() == cur_str: if cur < prev + len(prev_str): continue highlighted_chunk += text[prev + len(prev_str):cur] + hl_start + actual_term + hl_end prev = cur prev_str = cur_str # Keep track of how far we've copied so far, for the last step matched_so_far = cur + len(actual_term) # Don't forget the chunk after the last term highlighted_chunk += text[matched_so_far:] if start_offset > 0: highlighted_chunk = '...%s' % highlighted_chunk if end_offset < len(self.text_block): highlighted_chunk = '%s...' % highlighted_chunk return highlighted_chunk django-haystack-2.8.0/haystack/utils/loading.py000066400000000000000000000303021325051407000215210ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import copy import inspect import threading import warnings from collections import OrderedDict from django.conf import settings from django.core.exceptions import ImproperlyConfigured from django.utils import six from django.utils.module_loading import module_has_submodule from haystack import constants from haystack.exceptions import NotHandled, SearchFieldError from haystack.utils import importlib from haystack.utils.app_loading import haystack_get_app_modules def import_class(path): path_bits = path.split('.') # Cut off the class name at the end. class_name = path_bits.pop() module_path = '.'.join(path_bits) module_itself = importlib.import_module(module_path) if not hasattr(module_itself, class_name): raise ImportError("The Python module '%s' has no '%s' class." % (module_path, class_name)) return getattr(module_itself, class_name) # Load the search backend. def load_backend(full_backend_path): """ Loads a backend for interacting with the search engine. Requires a ``backend_path``. It should be a string resembling a Python import path, pointing to a ``BaseEngine`` subclass. The built-in options available include:: * haystack.backends.solr.SolrEngine * haystack.backends.xapian.XapianEngine (third-party) * haystack.backends.whoosh.WhooshEngine * haystack.backends.simple.SimpleEngine If you've implemented a custom backend, you can provide the path to your backend & matching ``Engine`` class. For example:: ``myapp.search_backends.CustomSolrEngine`` """ path_bits = full_backend_path.split('.') if len(path_bits) < 2: raise ImproperlyConfigured("The provided backend '%s' is not a complete Python path to a BaseEngine subclass." % full_backend_path) return import_class(full_backend_path) def load_router(full_router_path): """ Loads a router for choosing which connection to use. Requires a ``full_router_path``. It should be a string resembling a Python import path, pointing to a ``BaseRouter`` subclass. The built-in options available include:: * haystack.routers.DefaultRouter If you've implemented a custom backend, you can provide the path to your backend & matching ``Engine`` class. For example:: ``myapp.search_routers.MasterSlaveRouter`` """ path_bits = full_router_path.split('.') if len(path_bits) < 2: raise ImproperlyConfigured("The provided router '%s' is not a complete Python path to a BaseRouter subclass." % full_router_path) return import_class(full_router_path) class ConnectionHandler(object): def __init__(self, connections_info): self.connections_info = connections_info self.thread_local = threading.local() self._index = None def ensure_defaults(self, alias): try: conn = self.connections_info[alias] except KeyError: raise ImproperlyConfigured("The key '%s' isn't an available connection." % alias) if not conn.get('ENGINE'): conn['ENGINE'] = 'haystack.backends.simple_backend.SimpleEngine' def __getitem__(self, key): if not hasattr(self.thread_local, 'connections'): self.thread_local.connections = {} elif key in self.thread_local.connections: return self.thread_local.connections[key] self.ensure_defaults(key) self.thread_local.connections[key] = load_backend(self.connections_info[key]['ENGINE'])(using=key) return self.thread_local.connections[key] def reload(self, key): if not hasattr(self.thread_local, 'connections'): self.thread_local.connections = {} try: del self.thread_local.connections[key] except KeyError: pass return self.__getitem__(key) def all(self): return [self[alias] for alias in self.connections_info] class ConnectionRouter(object): def __init__(self): self._routers = None @property def routers(self): if self._routers is None: default_routers = ['haystack.routers.DefaultRouter'] router_list = getattr(settings, 'HAYSTACK_ROUTERS', default_routers) # in case HAYSTACK_ROUTERS is empty, fallback to default routers if not len(router_list): router_list = default_routers self._routers = [] for router_path in router_list: router_class = load_router(router_path) self._routers.append(router_class()) return self._routers def _for_action(self, action, many, **hints): conns = [] for router in self.routers: if hasattr(router, action): action_callable = getattr(router, action) connection_to_use = action_callable(**hints) if connection_to_use is not None: if isinstance(connection_to_use, six.string_types): conns.append(connection_to_use) else: conns.extend(connection_to_use) if not many: break return conns def for_write(self, **hints): return self._for_action('for_write', True, **hints) def for_read(self, **hints): return self._for_action('for_read', False, **hints)[0] class UnifiedIndex(object): # Used to collect all the indexes into a cohesive whole. def __init__(self, excluded_indexes=None): self._indexes = {} self.fields = OrderedDict() self._built = False self.excluded_indexes = excluded_indexes or [] self.excluded_indexes_ids = {} self.document_field = constants.DOCUMENT_FIELD self._fieldnames = {} self._facet_fieldnames = {} @property def indexes(self): warnings.warn("'UnifiedIndex.indexes' was deprecated in Haystack v2.3.0. Please use UnifiedIndex.get_indexes().") return self._indexes def collect_indexes(self): indexes = [] for app_mod in haystack_get_app_modules(): try: search_index_module = importlib.import_module("%s.search_indexes" % app_mod.__name__) except ImportError: if module_has_submodule(app_mod, 'search_indexes'): raise continue for item_name, item in inspect.getmembers(search_index_module, inspect.isclass): if getattr(item, 'haystack_use_for_indexing', False) and getattr(item, 'get_model', None): # We've got an index. Check if we should be ignoring it. class_path = "%s.search_indexes.%s" % (app_mod.__name__, item_name) if class_path in self.excluded_indexes or self.excluded_indexes_ids.get(item_name) == id(item): self.excluded_indexes_ids[str(item_name)] = id(item) continue indexes.append(item()) return indexes def reset(self): self._indexes = {} self.fields = OrderedDict() self._built = False self._fieldnames = {} self._facet_fieldnames = {} def build(self, indexes=None): self.reset() if indexes is None: indexes = self.collect_indexes() for index in indexes: model = index.get_model() if model in self._indexes: raise ImproperlyConfigured( "Model '%s' has more than one 'SearchIndex`` handling it. " "Please exclude either '%s' or '%s' using the 'EXCLUDED_INDEXES' " "setting defined in 'settings.HAYSTACK_CONNECTIONS'." % ( model, self._indexes[model], index ) ) self._indexes[model] = index self.collect_fields(index) self._built = True def collect_fields(self, index): for fieldname, field_object in index.fields.items(): if field_object.document is True: if field_object.index_fieldname != self.document_field: raise SearchFieldError("All 'SearchIndex' classes must use the same '%s' fieldname for the 'document=True' field. Offending index is '%s'." % (self.document_field, index)) # Stow the index_fieldname so we don't have to get it the hard way again. if fieldname in self._fieldnames and field_object.index_fieldname != self._fieldnames[fieldname]: # We've already seen this field in the list. Raise an exception if index_fieldname differs. raise SearchFieldError("All uses of the '%s' field need to use the same 'index_fieldname' attribute." % fieldname) self._fieldnames[fieldname] = field_object.index_fieldname # Stow the facet_fieldname so we don't have to look that up either. if hasattr(field_object, 'facet_for'): if field_object.facet_for: self._facet_fieldnames[field_object.facet_for] = fieldname else: self._facet_fieldnames[field_object.instance_name] = fieldname # Copy the field in so we've got a unified schema. if field_object.index_fieldname not in self.fields: self.fields[field_object.index_fieldname] = field_object self.fields[field_object.index_fieldname] = copy.copy(field_object) else: # If the field types are different, we can mostly # safely ignore this. The exception is ``MultiValueField``, # in which case we'll use it instead, copying over the # values. if field_object.is_multivalued: old_field = self.fields[field_object.index_fieldname] self.fields[field_object.index_fieldname] = field_object self.fields[field_object.index_fieldname] = copy.copy(field_object) # Switch it so we don't have to dupe the remaining # checks. field_object = old_field # We've already got this field in the list. Ensure that # what we hand back is a superset of all options that # affect the schema. if field_object.indexed is True: self.fields[field_object.index_fieldname].indexed = True if field_object.stored is True: self.fields[field_object.index_fieldname].stored = True if field_object.faceted is True: self.fields[field_object.index_fieldname].faceted = True if field_object.use_template is True: self.fields[field_object.index_fieldname].use_template = True if field_object.null is True: self.fields[field_object.index_fieldname].null = True def get_indexes(self): if not self._built: self.build() return self._indexes def get_indexed_models(self): # Ensuring a list here since Python3 will give us an iterator return list(self.get_indexes().keys()) def get_index_fieldname(self, field): if not self._built: self.build() return self._fieldnames.get(field) or field def get_index(self, model_klass): indexes = self.get_indexes() if model_klass not in indexes: raise NotHandled('The model %s is not registered' % model_klass) return indexes[model_klass] def get_facet_fieldname(self, field): if not self._built: self.build() for fieldname, field_object in self.fields.items(): if fieldname != field: continue if hasattr(field_object, 'facet_for'): if field_object.facet_for: return field_object.facet_for else: return field_object.instance_name else: return self._facet_fieldnames.get(field) or field return field def all_searchfields(self): if not self._built: self.build() return self.fields django-haystack-2.8.0/haystack/utils/log.py000066400000000000000000000011031325051407000206620ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import logging from django.conf import settings def getLogger(name): real_logger = logging.getLogger(name) return LoggingFacade(real_logger) class LoggingFacade(object): def __init__(self, real_logger): self.real_logger = real_logger def noop(self, *args, **kwargs): pass def __getattr__(self, attr): if getattr(settings, 'HAYSTACK_LOGGING', True): return getattr(self.real_logger, attr) return self.noop django-haystack-2.8.0/haystack/views.py000066400000000000000000000155331325051407000201120ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.conf import settings from django.core.paginator import InvalidPage, Paginator from django.http import Http404 from django.shortcuts import render from haystack.forms import FacetedSearchForm, ModelSearchForm from haystack.query import EmptySearchQuerySet RESULTS_PER_PAGE = getattr(settings, 'HAYSTACK_SEARCH_RESULTS_PER_PAGE', 20) class SearchView(object): template = 'search/search.html' extra_context = {} query = '' results = EmptySearchQuerySet() request = None form = None results_per_page = RESULTS_PER_PAGE def __init__(self, template=None, load_all=True, form_class=None, searchqueryset=None, results_per_page=None): self.load_all = load_all self.form_class = form_class self.searchqueryset = searchqueryset if form_class is None: self.form_class = ModelSearchForm if not results_per_page is None: self.results_per_page = results_per_page if template: self.template = template def __call__(self, request): """ Generates the actual response to the search. Relies on internal, overridable methods to construct the response. """ self.request = request self.form = self.build_form() self.query = self.get_query() self.results = self.get_results() return self.create_response() def build_form(self, form_kwargs=None): """ Instantiates the form the class should use to process the search query. """ data = None kwargs = { 'load_all': self.load_all, } if form_kwargs: kwargs.update(form_kwargs) if len(self.request.GET): data = self.request.GET if self.searchqueryset is not None: kwargs['searchqueryset'] = self.searchqueryset return self.form_class(data, **kwargs) def get_query(self): """ Returns the query provided by the user. Returns an empty string if the query is invalid. """ if self.form.is_valid(): return self.form.cleaned_data['q'] return '' def get_results(self): """ Fetches the results via the form. Returns an empty list if there's no query to search with. """ return self.form.search() def build_page(self): """ Paginates the results appropriately. In case someone does not want to use Django's built-in pagination, it should be a simple matter to override this method to do what they would like. """ try: page_no = int(self.request.GET.get('page', 1)) except (TypeError, ValueError): raise Http404("Not a valid number for page.") if page_no < 1: raise Http404("Pages should be 1 or greater.") start_offset = (page_no - 1) * self.results_per_page self.results[start_offset:start_offset + self.results_per_page] paginator = Paginator(self.results, self.results_per_page) try: page = paginator.page(page_no) except InvalidPage: raise Http404("No such page!") return (paginator, page) def extra_context(self): """ Allows the addition of more context variables as needed. Must return a dictionary. """ return {} def get_context(self): (paginator, page) = self.build_page() context = { 'query': self.query, 'form': self.form, 'page': page, 'paginator': paginator, 'suggestion': None, } if hasattr(self.results, 'query') and self.results.query.backend.include_spelling: context['suggestion'] = self.form.get_suggestion() context.update(self.extra_context()) return context def create_response(self): """ Generates the actual HttpResponse to send back to the user. """ context = self.get_context() return render(self.request, self.template, context) def search_view_factory(view_class=SearchView, *args, **kwargs): def search_view(request): return view_class(*args, **kwargs)(request) return search_view class FacetedSearchView(SearchView): def __init__(self, *args, **kwargs): # Needed to switch out the default form class. if kwargs.get('form_class') is None: kwargs['form_class'] = FacetedSearchForm super(FacetedSearchView, self).__init__(*args, **kwargs) def build_form(self, form_kwargs=None): if form_kwargs is None: form_kwargs = {} # This way the form can always receive a list containing zero or more # facet expressions: form_kwargs['selected_facets'] = self.request.GET.getlist("selected_facets") return super(FacetedSearchView, self).build_form(form_kwargs) def extra_context(self): extra = super(FacetedSearchView, self).extra_context() extra['request'] = self.request extra['facets'] = self.results.facet_counts() return extra def basic_search(request, template='search/search.html', load_all=True, form_class=ModelSearchForm, searchqueryset=None, extra_context=None, results_per_page=None): """ A more traditional view that also demonstrate an alternative way to use Haystack. Useful as an example of for basing heavily custom views off of. Also has the benefit of thread-safety, which the ``SearchView`` class may not be. Template:: ``search/search.html`` Context:: * form An instance of the ``form_class``. (default: ``ModelSearchForm``) * page The current page of search results. * paginator A paginator instance for the results. * query The query received by the form. """ query = '' results = EmptySearchQuerySet() if request.GET.get('q'): form = form_class(request.GET, searchqueryset=searchqueryset, load_all=load_all) if form.is_valid(): query = form.cleaned_data['q'] results = form.search() else: form = form_class(searchqueryset=searchqueryset, load_all=load_all) paginator = Paginator(results, results_per_page or RESULTS_PER_PAGE) try: page = paginator.page(int(request.GET.get('page', 1))) except InvalidPage: raise Http404("No such page of results!") context = { 'form': form, 'page': page, 'paginator': paginator, 'query': query, 'suggestion': None, } if results.query.backend.include_spelling: context['suggestion'] = form.get_suggestion() if extra_context: context.update(extra_context) return render(request, template, context) django-haystack-2.8.0/setup.cfg000066400000000000000000000003161325051407000164060ustar00rootroot00000000000000[pep8] max-line-length=110 exclude=docs [flake8] max-line-length=110 exclude=docs [frosted] max-line-length=110 exclude=docs [isort] line_length=110 default_section=THIRDPARTY known_first_party=haystack django-haystack-2.8.0/setup.py000077500000000000000000000035531325051407000163100ustar00rootroot00000000000000#!/usr/bin/env python # encoding: utf-8 # n.b. we can't have unicode_literals here due to http://bugs.python.org/setuptools/issue152 from __future__ import absolute_import, division, print_function try: from setuptools import setup except ImportError: from ez_setup import use_setuptools use_setuptools() from setuptools import setup install_requires = [ 'Django>=1.11', ] tests_require = [ 'pysolr>=3.7.0', 'whoosh>=2.5.4,<3.0', 'python-dateutil', 'geopy==0.95.1', 'nose', 'mock', 'coverage', 'requests', ] setup( name='django-haystack', use_scm_version=True, description='Pluggable search for Django.', author='Daniel Lindsley', author_email='daniel@toastdriven.com', long_description=open('README.rst', 'r').read(), url='http://haystacksearch.org/', packages=[ 'haystack', 'haystack.backends', 'haystack.management', 'haystack.management.commands', 'haystack.templatetags', 'haystack.utils', ], package_data={ 'haystack': [ 'templates/panels/*', 'templates/search_configuration/*', ] }, classifiers=[ 'Development Status :: 5 - Production/Stable', 'Environment :: Web Environment', 'Framework :: Django', 'Framework :: Django :: 1.11', 'Framework :: Django :: 2.0', 'Intended Audience :: Developers', 'License :: OSI Approved :: BSD License', 'Operating System :: OS Independent', 'Programming Language :: Python', 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 3', 'Topic :: Utilities', ], zip_safe=False, install_requires=install_requires, tests_require=tests_require, test_suite="test_haystack.run_tests.run_all", setup_requires=['setuptools_scm'], ) django-haystack-2.8.0/test_haystack/000077500000000000000000000000001325051407000174335ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/__init__.py000066400000000000000000000010441325051407000215430ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import import os test_runner = None old_config = None os.environ['DJANGO_SETTINGS_MODULE'] = 'test_haystack.settings' import django django.setup() def setup(): global test_runner global old_config from django.test.runner import DiscoverRunner test_runner = DiscoverRunner() test_runner.setup_test_environment() old_config = test_runner.setup_databases() def teardown(): test_runner.teardown_databases(old_config) test_runner.teardown_test_environment() django-haystack-2.8.0/test_haystack/core/000077500000000000000000000000001325051407000203635ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/core/__init__.py000066400000000000000000000000001325051407000224620ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/core/admin.py000066400000000000000000000006321325051407000220260ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.contrib import admin from haystack.admin import SearchModelAdmin from .models import MockModel class MockModelAdmin(SearchModelAdmin): haystack_connection = 'solr' date_hierarchy = 'pub_date' list_display = ('author', 'pub_date') admin.site.register(MockModel, MockModelAdmin) django-haystack-2.8.0/test_haystack/core/custom_identifier.py000066400000000000000000000007051325051407000244530ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import hashlib def get_identifier_method(key): """ Custom get_identifier method used for testing the setting HAYSTACK_IDENTIFIER_MODULE """ if hasattr(key, 'get_custom_haystack_id'): return key.get_custom_haystack_id() else: key_bytes = key.encode('utf-8') return hashlib.md5(key_bytes).hexdigest() django-haystack-2.8.0/test_haystack/core/fixtures/000077500000000000000000000000001325051407000222345ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/core/fixtures/base_data.json000066400000000000000000000027401325051407000250350ustar00rootroot00000000000000[ { "pk": 1, "model": "core.mocktag", "fields": { "name": "primary" } }, { "pk": 2, "model": "core.mocktag", "fields": { "name": "secondary" } }, { "pk": 1, "model": "core.mockmodel", "fields": { "author": "daniel1", "foo": "bar", "pub_date": "2009-03-17 06:00:00", "tag": 1 } }, { "pk": 2, "model": "core.mockmodel", "fields": { "author": "daniel2", "foo": "bar", "pub_date": "2009-03-17 07:00:00", "tag": 1 } }, { "pk": 3, "model": "core.mockmodel", "fields": { "author": "daniel3", "foo": "bar", "pub_date": "2009-03-17 08:00:00", "tag": 2 } }, { "pk": "sometext", "model": "core.charpkmockmodel", "fields": { } }, { "pk": "1234", "model": "core.charpkmockmodel", "fields": { } }, { "pk": 1, "model": "core.afifthmockmodel", "fields": { "author": "sam1", "deleted": false } }, { "pk": 2, "model": "core.afifthmockmodel", "fields": { "author": "sam2", "deleted": true } }, { "pk": "53554c58-7051-4350-bcc9-dad75eb248a9", "model": "core.uuidmockmodel", "fields": { "characteristics": "some text that was indexed" } }, { "pk": "77554c58-7051-4350-bcc9-dad75eb24888", "model": "core.uuidmockmodel", "fields": { "characteristics": "more text that was indexed" } } ] django-haystack-2.8.0/test_haystack/core/fixtures/bulk_data.json000066400000000000000000000327611325051407000250660ustar00rootroot00000000000000[ { "pk": 1, "model": "core.mocktag", "fields": { "name": "search_test" } }, { "pk": 1, "model": "core.mockmodel", "fields": { "author": "daniel1", "foo": "Registering indexes in Haystack is very similar to registering models and ``ModelAdmin`` classes in the `Django admin site`_. If you want to override the default indexing behavior for your model you can specify your own ``SearchIndex`` class. This is useful for ensuring that future-dated or non-live content is not indexed and searchable. Our ``Note`` model has a ``pub_date`` field, so let's update our code to include our own ``SearchIndex`` to exclude indexing future-dated notes:", "pub_date": "2009-06-18 06:00:00", "tag": 1 } }, { "pk": 2, "model": "core.mockmodel", "fields": { "author": "daniel2", "foo": "In addition, you may specify other fields to be populated along with the document. In this case, we also index the user who authored the document as well as the date the document was published. The variable you assign the SearchField to should directly map to the field your search backend is expecting. You instantiate most search fields with a parameter that points to the attribute of the object to populate that field with.", "pub_date": "2009-07-17 00:30:00", "tag": 1 } }, { "pk": 3, "model": "core.mockmodel", "fields": { "author": "daniel3", "foo": "Every custom ``SearchIndex`` requires there be one and only one field with ``document=True``. This is the primary field that will get passed to the backend for indexing. For this field, you'll then need to create a template at ``search/indexes/myapp/note_text.txt``. This allows you to customize the document that will be passed to the search backend for indexing. A sample template might look like:", "pub_date": "2009-06-18 08:00:00", "tag": 1 } }, { "pk": 4, "model": "core.mockmodel", "fields": { "author": "daniel3", "foo": "The exception to this is the TemplateField class. This take either no arguments or an explicit template name to populate their contents. You can find more information about them in the SearchIndex API reference.", "pub_date": "2009-07-17 01:30:00", "tag": 1 } }, { "pk": 5, "model": "core.mockmodel", "fields": { "author": "daniel1", "foo": "This will pull in the default URLconf for Haystack. It consists of a single URLconf that points to a SearchView instance. You can change this class’s behavior by passing it any of several keyword arguments or override it entirely with your own view.", "pub_date": "2009-07-17 02:30:00", "tag": 1 } }, { "pk": 6, "model": "core.mockmodel", "fields": { "author": "daniel1", "foo": "This will create a default SearchIndex instance, search through all of your INSTALLED_APPS for search_indexes.py and register all SearchIndexes with the default SearchIndex. If autodiscovery and inclusion of all indexes is not desirable, you can manually register models in the following manner:", "pub_date": "2009-07-17 03:30:00", "tag": 1 } }, { "pk": 7, "model": "core.mockmodel", "fields": { "author": "daniel1", "foo": "The SearchBackend class handles interaction directly with the backend. The search query it performs is usually fed to it from a SearchQuery class that has been built for that backend. This class must be at least partially implemented on a per-backend basis and is usually accompanied by a SearchQuery class within the same module.", "pub_date": "2009-07-17 04:30:00", "tag": 1 } }, { "pk": 8, "model": "core.mockmodel", "fields": { "author": "daniel2", "foo": "Takes a query to search on and returns dictionary. The query should be a string that is appropriate syntax for the backend. The returned dictionary should contain the keys ‘results’ and ‘hits’. The ‘results’ value should be an iterable of populated SearchResult objects. The ‘hits’ should be an integer count of the number of matched results the search backend found. This method MUST be implemented by each backend, as it will be highly specific to each one.", "pub_date": "2009-07-17 05:30:00", "tag": 1 } }, { "pk": 9, "model": "core.mockmodel", "fields": { "author": "daniel1", "foo": "The SearchQuery class acts as an intermediary between SearchQuerySet‘s abstraction and SearchBackend‘s actual search. Given the metadata provided by SearchQuerySet, SearchQuery build the actual query and interacts with the SearchBackend on SearchQuerySet‘s behalf. This class must be at least partially implemented on a per-backend basis, as portions are highly specific to the backend. It usually is bundled with the accompanying SearchBackend.", "pub_date": "2009-07-17 06:30:00", "tag": 1 } }, { "pk": 10, "model": "core.mockmodel", "fields": { "author": "daniel3", "foo": "Most people will NOT have to use this class directly. SearchQuerySet handles all interactions with SearchQuery objects and provides a nicer interface to work with. Should you need advanced/custom behavior, you can supply your version of SearchQuery that overrides/extends the class in the manner you see fit. SearchQuerySet objects take a kwarg parameter query where you can pass in your class.", "pub_date": "2009-07-17 07:30:00", "tag": 1 } }, { "pk": 11, "model": "core.mockmodel", "fields": { "author": "daniel1", "foo": "The SearchQuery object maintains a list of QueryFilter objects. Each filter object supports what field it looks up against, what kind of lookup (i.e. the __’s), what value it’s looking for and if it’s a AND/OR/NOT. The SearchQuery object’s “build_query” method should then iterate over that list and convert that to a valid query for the search backend.", "pub_date": "2009-07-17 08:30:00", "tag": 1 } }, { "pk": 12, "model": "core.mockmodel", "fields": { "author": "daniel2", "foo": "The SearchSite provides a way to collect the SearchIndexes that are relevant to the current site, much like ModelAdmins in the admin app. This allows you to register indexes on models you don’t control (reusable apps, django.contrib, etc.) as well as customize on a per-site basis what indexes should be available (different indexes for different sites, same codebase).", "pub_date": "2009-07-17 09:30:00", "tag": 1 } }, { "pk": 13, "model": "core.mockmodel", "fields": { "author": "daniel3", "foo": "If you need to narrow the indexes that get registered, you will need to manipulate a SearchSite. There are two ways to go about this, via either register or unregister. If you want most of the indexes but want to forgo a specific one(s), you can setup the main site via autodiscover then simply unregister the one(s) you don’t want.:", "pub_date": "2009-07-17 10:30:00", "tag": 1 } }, { "pk": 14, "model": "core.mockmodel", "fields": { "author": "daniel2", "foo": "The SearchIndex class allows the application developer a way to provide data to the backend in a structured format. Developers familiar with Django’s Form or Model classes should find the syntax for indexes familiar. This class is arguably the most important part of integrating Haystack into your application, as it has a large impact on the quality of the search results and how easy it is for users to find what they’re looking for. Care and effort should be put into making your indexes the best they can be.", "pub_date": "2009-07-17 11:30:00", "tag": 1 } }, { "pk": 15, "model": "core.mockmodel", "fields": { "author": "daniel2", "foo": "Unlike relational databases, most search engines supported by Haystack are primarily document-based. They focus on a single text blob which they tokenize, analyze and index. When searching, this field is usually the primary one that is searched. Further, the schema used by most engines is the same for all types of data added, unlike a relational database that has a table schema for each chunk of data. It may be helpful to think of your search index as something closer to a key-value store instead of imagining it in terms of a RDBMS.", "pub_date": "2009-07-17 12:30:00", "tag": 1 } }, { "pk": 16, "model": "core.mockmodel", "fields": { "author": "daniel3", "foo": "Common uses include storing pertinent data information, categorizations of the document, author information and related data. By adding fields for these pieces of data, you provide a means to further narrow/filter search terms. This can be useful from either a UI perspective (a better advanced search form) or from a developer standpoint (section-dependent search, off-loading certain tasks to search, et cetera).", "pub_date": "2009-07-17 13:30:00", "tag": 1 } }, { "pk": 17, "model": "core.mockmodel", "fields": { "author": "daniel3", "foo": "Most search engines that were candidates for inclusion in Haystack all had a central concept of a document that they indexed. These documents form a corpus within which to primarily search. Because this ideal is so central and most of Haystack is designed to have pluggable backends, it is important to ensure that all engines have at least a bare minimum of the data they need to function.", "pub_date": "2009-07-17 14:30:00", "tag": 1 } }, { "pk": 18, "model": "core.mockmodel", "fields": { "author": "daniel1", "foo": "As a result, when creating a SearchIndex, at least one field must be marked with document=True. This signifies to Haystack that whatever is placed in this field while indexing is to be the primary text the search engine indexes. The name of this field can be almost anything, but text is one of the more common names used.", "pub_date": "2009-07-17 15:30:00", "tag": 1 } }, { "pk": 19, "model": "core.mockmodel", "fields": { "author": "daniel3", "foo": "One shortcoming of the use of search is that you rarely have all or the most up-to-date information about an object in the index. As a result, when retrieving search results, you will likely have to access the object in the database to provide better information. However, this can also hit the database quite heavily (think .get(pk=result.id) per object). If your search is popular, this can lead to a big performance hit. There are two ways to prevent this. The first way is SearchQuerySet.load_all, which tries to group all similar objects and pull them though one query instead of many. This still hits the DB and incurs a performance penalty.", "pub_date": "2009-07-17 16:30:00", "tag": 1 } }, { "pk": 20, "model": "core.mockmodel", "fields": { "author": "daniel2", "foo": "The other option is to leverage stored fields. By default, all fields in Haystack are both indexed (searchable by the engine) and stored (retained by the engine and presented in the results). By using a stored field, you can store commonly used data in such a way that you don’t need to hit the database when processing the search result to get more information. By the way: Jenny's number is 867-5309", "pub_date": "2009-07-17 17:30:00", "tag": 1 } }, { "pk": 21, "model": "core.mockmodel", "fields": { "author": "daniel2", "foo": "For example, one great way to leverage this is to pre-rendering an object’s search result template DURING indexing. You define an additional field, render a template with it and it follows the main indexed record into the index. Then, when that record is pulled when it matches a query, you can simply display the contents of that field, which avoids the database hit.:", "pub_date": "2009-07-17 18:30:00", "tag": 1 } }, { "pk": 22, "model": "core.mockmodel", "fields": { "author": "daniel3", "foo": "However, sometimes, even more control over what gets placed in your index is needed. To facilitate this, SearchIndex objects have a ‘preparation’ stage that populates data just before it is indexed. You can hook into this phase in several ways. This should be very familiar to developers who have used Django’s forms before as it loosely follows similar concepts, though the emphasis here is less on cleansing data from user input and more on making the data friendly to the search backend.", "pub_date": "2009-07-17 19:30:00", "tag": 1 } }, { "pk": 23, "model": "core.mockmodel", "fields": { "author": "daniel3", "foo": "Each SearchIndex gets a prepare method, which handles collecting all the data. This method should return a dictionary that will be the final data used by the search backend. Overriding this method is useful if you need to collect more than one piece of data or need to incorporate additional data that is not well represented by a single SearchField. An example might look like:", "pub_date": "2009-07-17 20:30:00", "tag": 1 } }, { "pk": 1, "model": "core.anothermockmodel", "fields": { "author": "daniel3", "pub_date": "2009-07-17 21:30:00" } }, { "pk": 2, "model": "core.anothermockmodel", "fields": { "author": "daniel3", "pub_date": "2009-07-17 22:30:00" } }, { "pk": 1, "model": "core.ScoreMockModel", "fields": { "score": "42" } } ] django-haystack-2.8.0/test_haystack/core/models.py000066400000000000000000000054401325051407000222230ustar00rootroot00000000000000# encoding: utf-8 # A couple models for Haystack to test with. from __future__ import absolute_import, division, print_function, unicode_literals import datetime import uuid from django.db import models class MockTag(models.Model): name = models.CharField(max_length=32) class MockModel(models.Model): author = models.CharField(max_length=255) foo = models.CharField(max_length=255, blank=True) pub_date = models.DateTimeField(default=datetime.datetime.now) tag = models.ForeignKey(MockTag, models.CASCADE) def __unicode__(self): return self.author def hello(self): return 'World!' class UUIDMockModel(models.Model): id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False) characteristics = models.TextField() def __unicode__(self): return str(self.id) class AnotherMockModel(models.Model): author = models.CharField(max_length=255) pub_date = models.DateTimeField(default=datetime.datetime.now) def __unicode__(self): return self.author class AThirdMockModel(AnotherMockModel): average_delay = models.FloatField(default=0.0) view_count = models.PositiveIntegerField(default=0) class CharPKMockModel(models.Model): key = models.CharField(primary_key=True, max_length=10) class AFourthMockModel(models.Model): author = models.CharField(max_length=255) editor = models.CharField(max_length=255) pub_date = models.DateTimeField(default=datetime.datetime.now) def __unicode__(self): return self.author class SoftDeleteManager(models.Manager): def get_queryset(self): return super(SoftDeleteManager, self).get_queryset().filter(deleted=False) def complete_set(self): return super(SoftDeleteManager, self).get_queryset() class AFifthMockModel(models.Model): author = models.CharField(max_length=255) deleted = models.BooleanField(default=False) objects = SoftDeleteManager() def __unicode__(self): return self.author class ASixthMockModel(models.Model): name = models.CharField(max_length=255) lat = models.FloatField() lon = models.FloatField() def __unicode__(self): return self.name class ScoreMockModel(models.Model): score = models.CharField(max_length=10) def __unicode__(self): return self.score class ManyToManyLeftSideModel(models.Model): related_models = models.ManyToManyField('ManyToManyRightSideModel') class ManyToManyRightSideModel(models.Model): name = models.CharField(max_length=32, default='Default name') def __unicode__(self): return self.name class OneToManyLeftSideModel(models.Model): pass class OneToManyRightSideModel(models.Model): left_side = models.ForeignKey(OneToManyLeftSideModel, models.CASCADE, related_name='right_side') django-haystack-2.8.0/test_haystack/core/templates/000077500000000000000000000000001325051407000223615ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/core/templates/404.html000066400000000000000000000000311325051407000235500ustar00rootroot00000000000000{% extends 'base.html' %}django-haystack-2.8.0/test_haystack/core/templates/base.html000066400000000000000000000000001325051407000241470ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/core/templates/search/000077500000000000000000000000001325051407000236265ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/core/templates/search/indexes/000077500000000000000000000000001325051407000252655ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/core/templates/search/indexes/bar.txt000066400000000000000000000000051325051407000265650ustar00rootroot00000000000000BAR! django-haystack-2.8.0/test_haystack/core/templates/search/indexes/core/000077500000000000000000000000001325051407000262155ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/core/templates/search/indexes/core/mockmodel_content.txt000066400000000000000000000000301325051407000324530ustar00rootroot00000000000000Indexed! {{ object.pk }}django-haystack-2.8.0/test_haystack/core/templates/search/indexes/core/mockmodel_extra.txt000066400000000000000000000000271325051407000321320ustar00rootroot00000000000000Stored! {{ object.pk }}django-haystack-2.8.0/test_haystack/core/templates/search/indexes/core/mockmodel_template.txt000066400000000000000000000000301325051407000326140ustar00rootroot00000000000000Indexed! {{ object.pk }}django-haystack-2.8.0/test_haystack/core/templates/search/indexes/core/mockmodel_text.txt000066400000000000000000000000301325051407000317650ustar00rootroot00000000000000Indexed! {{ object.pk }}django-haystack-2.8.0/test_haystack/core/templates/search/indexes/foo.txt000066400000000000000000000000051325051407000266040ustar00rootroot00000000000000FOO! django-haystack-2.8.0/test_haystack/core/templates/search/search.html000066400000000000000000000000311325051407000257530ustar00rootroot00000000000000{% extends 'base.html' %}django-haystack-2.8.0/test_haystack/core/templates/test_suggestion.html000066400000000000000000000000341325051407000264720ustar00rootroot00000000000000Suggestion: {{ suggestion }}django-haystack-2.8.0/test_haystack/core/urls.py000066400000000000000000000015301325051407000217210ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.conf.urls import include, url from django.contrib import admin from haystack.forms import FacetedSearchForm from haystack.query import SearchQuerySet from haystack.views import FacetedSearchView, SearchView, basic_search admin.autodiscover() urlpatterns = [ url(r'^admin/', admin.site.urls), url(r'^$', SearchView(load_all=False), name='haystack_search'), url(r'^faceted/$', FacetedSearchView(searchqueryset=SearchQuerySet().facet('author'), form_class=FacetedSearchForm), name='haystack_faceted_search'), url(r'^basic/$', basic_search, {'load_all': False}, name='haystack_basic_search'), ] urlpatterns += [ url(r'', include(('test_haystack.test_app_without_models.urls', 'app-without-models'))), ] django-haystack-2.8.0/test_haystack/discovery/000077500000000000000000000000001325051407000214425ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/discovery/__init__.py000066400000000000000000000000001325051407000235410ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/discovery/models.py000066400000000000000000000007011325051407000232750ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.db import models class Foo(models.Model): title = models.CharField(max_length=255) body = models.TextField() def __unicode__(self): return self.title class Bar(models.Model): author = models.CharField(max_length=255) content = models.TextField() def __unicode__(self): return self.author django-haystack-2.8.0/test_haystack/discovery/search_indexes.py000066400000000000000000000007621325051407000250050ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from test_haystack.discovery.models import Bar, Foo from haystack import indexes class FooIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, model_attr='body') def get_model(self): return Foo class BarIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True) def get_model(self): return Bar django-haystack-2.8.0/test_haystack/discovery/templates/000077500000000000000000000000001325051407000234405ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/discovery/templates/search/000077500000000000000000000000001325051407000247055ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/discovery/templates/search/indexes/000077500000000000000000000000001325051407000263445ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/discovery/templates/search/indexes/bar_text.txt000066400000000000000000000000441325051407000307130ustar00rootroot00000000000000{{ object.title }} {{ object.body }}django-haystack-2.8.0/test_haystack/elasticsearch2_tests/000077500000000000000000000000001325051407000235515ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/elasticsearch2_tests/__init__.py000066400000000000000000000016571325051407000256730ustar00rootroot00000000000000# -*- coding: utf-8 -*- import warnings from django.conf import settings import unittest from haystack.utils import log as logging warnings.simplefilter('ignore', Warning) def setup(): log = logging.getLogger('haystack') try: import elasticsearch if not ((2, 0, 0) <= elasticsearch.__version__ < (3, 0, 0)): raise ImportError from elasticsearch import Elasticsearch, exceptions except ImportError: log.error("Skipping ElasticSearch 2 tests: 'elasticsearch>=2.0.0,<3.0.0' not installed.") raise unittest.SkipTest("'elasticsearch>=2.0.0,<3.0.0' not installed.") url = settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'] es = Elasticsearch(url) try: es.info() except exceptions.ConnectionError as e: log.error("elasticsearch not running on %r" % url, exc_info=True) raise unittest.SkipTest("elasticsearch not running on %r" % url, e) django-haystack-2.8.0/test_haystack/elasticsearch2_tests/test_backend.py000066400000000000000000001727061325051407000265660ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import absolute_import, division, print_function, unicode_literals import datetime import logging as std_logging import operator import unittest from decimal import Decimal import elasticsearch from django.apps import apps from django.conf import settings from django.test import TestCase from django.test.utils import override_settings from haystack import connections, indexes, reset_search_queries from haystack.exceptions import SkipDocument from haystack.inputs import AutoQuery from haystack.models import SearchResult from haystack.query import RelatedSearchQuerySet, SearchQuerySet, SQ from haystack.utils import log as logging from haystack.utils.geo import Point from haystack.utils.loading import UnifiedIndex from ..core.models import AFourthMockModel, AnotherMockModel, ASixthMockModel, MockModel from ..mocks import MockSearchResult test_pickling = True try: import cPickle as pickle except ImportError: try: import pickle except ImportError: test_pickling = False def clear_elasticsearch_index(): # Wipe it clean. raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL']) try: raw_es.indices.delete(index=settings.HAYSTACK_CONNECTIONS['elasticsearch']['INDEX_NAME']) raw_es.indices.refresh() except elasticsearch.TransportError: pass # Since we've just completely deleted the index, we'll reset setup_complete so the next access will # correctly define the mappings: connections['elasticsearch'].get_backend().setup_complete = False class Elasticsearch2MockSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) name = indexes.CharField(model_attr='author', faceted=True) pub_date = indexes.DateTimeField(model_attr='pub_date') def get_model(self): return MockModel class Elasticsearch2MockSearchIndexWithSkipDocument(Elasticsearch2MockSearchIndex): def prepare_text(self, obj): if obj.author == 'daniel3': raise SkipDocument return u"Indexed!\n%s" % obj.id class Elasticsearch2MockSpellingIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True) name = indexes.CharField(model_attr='author', faceted=True) pub_date = indexes.DateTimeField(model_attr='pub_date') def get_model(self): return MockModel def prepare_text(self, obj): return obj.foo class Elasticsearch2MaintainTypeMockSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) month = indexes.CharField(indexed=False) pub_date = indexes.DateTimeField(model_attr='pub_date') def prepare_month(self, obj): return "%02d" % obj.pub_date.month def get_model(self): return MockModel class Elasticsearch2MockModelSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(model_attr='foo', document=True) name = indexes.CharField(model_attr='author') pub_date = indexes.DateTimeField(model_attr='pub_date') def get_model(self): return MockModel class Elasticsearch2AnotherMockModelSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True) name = indexes.CharField(model_attr='author') pub_date = indexes.DateTimeField(model_attr='pub_date') def get_model(self): return AnotherMockModel def prepare_text(self, obj): return u"You might be searching for the user %s" % obj.author class Elasticsearch2BoostMockSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField( document=True, use_template=True, template_name='search/indexes/core/mockmodel_template.txt' ) author = indexes.CharField(model_attr='author', weight=2.0) editor = indexes.CharField(model_attr='editor') pub_date = indexes.DateTimeField(model_attr='pub_date') def get_model(self): return AFourthMockModel def prepare(self, obj): data = super(Elasticsearch2BoostMockSearchIndex, self).prepare(obj) if obj.pk == 4: data['boost'] = 5.0 return data class Elasticsearch2FacetingMockSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True) author = indexes.CharField(model_attr='author', faceted=True) editor = indexes.CharField(model_attr='editor', faceted=True) pub_date = indexes.DateField(model_attr='pub_date', faceted=True) facet_field = indexes.FacetCharField(model_attr='author') def prepare_text(self, obj): return '%s %s' % (obj.author, obj.editor) def get_model(self): return AFourthMockModel class Elasticsearch2RoundTripSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, default='') name = indexes.CharField() is_active = indexes.BooleanField() post_count = indexes.IntegerField() average_rating = indexes.FloatField() price = indexes.DecimalField() pub_date = indexes.DateField() created = indexes.DateTimeField() tags = indexes.MultiValueField() sites = indexes.MultiValueField() def get_model(self): return MockModel def prepare(self, obj): prepped = super(Elasticsearch2RoundTripSearchIndex, self).prepare(obj) prepped.update({ 'text': 'This is some example text.', 'name': 'Mister Pants', 'is_active': True, 'post_count': 25, 'average_rating': 3.6, 'price': Decimal('24.99'), 'pub_date': datetime.date(2009, 11, 21), 'created': datetime.datetime(2009, 11, 21, 21, 31, 00), 'tags': ['staff', 'outdoor', 'activist', 'scientist'], 'sites': [3, 5, 1], }) return prepped class Elasticsearch2ComplexFacetsMockSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, default='') name = indexes.CharField(faceted=True) is_active = indexes.BooleanField(faceted=True) post_count = indexes.IntegerField() post_count_i = indexes.FacetIntegerField(facet_for='post_count') average_rating = indexes.FloatField(faceted=True) pub_date = indexes.DateField(faceted=True) created = indexes.DateTimeField(faceted=True) sites = indexes.MultiValueField(faceted=True) def get_model(self): return MockModel class Elasticsearch2AutocompleteMockModelSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(model_attr='foo', document=True) name = indexes.CharField(model_attr='author') pub_date = indexes.DateTimeField(model_attr='pub_date') text_auto = indexes.EdgeNgramField(model_attr='foo') name_auto = indexes.EdgeNgramField(model_attr='author') def get_model(self): return MockModel class Elasticsearch2SpatialSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(model_attr='name', document=True) location = indexes.LocationField() def prepare_location(self, obj): return "%s,%s" % (obj.lat, obj.lon) def get_model(self): return ASixthMockModel class TestSettings(TestCase): def test_kwargs_are_passed_on(self): from haystack.backends.elasticsearch_backend import ElasticsearchSearchBackend backend = ElasticsearchSearchBackend('alias', **{ 'URL': settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'], 'INDEX_NAME': 'testing', 'KWARGS': {'max_retries': 42} }) self.assertEqual(backend.conn.transport.max_retries, 42) class Elasticsearch2SearchBackendTestCase(TestCase): def setUp(self): super(Elasticsearch2SearchBackendTestCase, self).setUp() # Wipe it clean. self.raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL']) clear_elasticsearch_index() # Stow. self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2MockSearchIndex() self.smmidni = Elasticsearch2MockSearchIndexWithSkipDocument() self.smtmmi = Elasticsearch2MaintainTypeMockSearchIndex() self.ui.build(indexes=[self.smmi]) connections['elasticsearch']._index = self.ui self.sb = connections['elasticsearch'].get_backend() # Force the backend to rebuild the mapping each time. self.sb.existing_mapping = {} self.sb.setup() self.sample_objs = [] for i in range(1, 4): mock = MockModel() mock.id = i mock.author = 'daniel%s' % i mock.pub_date = datetime.date(2009, 2, 25) - datetime.timedelta(days=i) self.sample_objs.append(mock) def tearDown(self): connections['elasticsearch']._index = self.old_ui super(Elasticsearch2SearchBackendTestCase, self).tearDown() self.sb.silently_fail = True def raw_search(self, query): try: return self.raw_es.search(q='*:*', index=settings.HAYSTACK_CONNECTIONS['elasticsearch']['INDEX_NAME']) except elasticsearch.TransportError: return {} def test_non_silent(self): bad_sb = connections['elasticsearch'].backend('bad', URL='http://omg.wtf.bbq:1000/', INDEX_NAME='whatver', SILENTLY_FAIL=False, TIMEOUT=1) try: bad_sb.update(self.smmi, self.sample_objs) self.fail() except: pass try: bad_sb.remove('core.mockmodel.1') self.fail() except: pass try: bad_sb.clear() self.fail() except: pass try: bad_sb.search('foo') self.fail() except: pass def test_update_no_documents(self): url = settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'] index_name = settings.HAYSTACK_CONNECTIONS['elasticsearch']['INDEX_NAME'] sb = connections['elasticsearch'].backend('elasticsearch', URL=url, INDEX_NAME=index_name, SILENTLY_FAIL=True) self.assertEqual(sb.update(self.smmi, []), None) sb = connections['elasticsearch'].backend('elasticsearch', URL=url, INDEX_NAME=index_name, SILENTLY_FAIL=False) try: sb.update(self.smmi, []) self.fail() except: pass def test_update(self): self.sb.update(self.smmi, self.sample_objs) # Check what Elasticsearch thinks is there. self.assertEqual(self.raw_search('*:*')['hits']['total'], 3) self.assertEqual( sorted([res['_source'] for res in self.raw_search('*:*')['hits']['hits']], key=lambda x: x['id']), [ { 'django_id': '1', 'django_ct': 'core.mockmodel', 'name': 'daniel1', 'name_exact': 'daniel1', 'text': 'Indexed!\n1', 'pub_date': '2009-02-24T00:00:00', 'id': 'core.mockmodel.1' }, { 'django_id': '2', 'django_ct': 'core.mockmodel', 'name': 'daniel2', 'name_exact': 'daniel2', 'text': 'Indexed!\n2', 'pub_date': '2009-02-23T00:00:00', 'id': 'core.mockmodel.2' }, { 'django_id': '3', 'django_ct': 'core.mockmodel', 'name': 'daniel3', 'name_exact': 'daniel3', 'text': 'Indexed!\n3', 'pub_date': '2009-02-22T00:00:00', 'id': 'core.mockmodel.3' } ]) def test_update_with_SkipDocument_raised(self): self.sb.update(self.smmidni, self.sample_objs) # Check what Elasticsearch thinks is there. res = self.raw_search('*:*')['hits'] self.assertEqual(res['total'], 2) self.assertListEqual( sorted([x['_source']['id'] for x in res['hits']]), ['core.mockmodel.1', 'core.mockmodel.2'] ) def test_remove(self): self.sb.update(self.smmi, self.sample_objs) self.assertEqual(self.raw_search('*:*')['hits']['total'], 3) self.sb.remove(self.sample_objs[0]) self.assertEqual(self.raw_search('*:*')['hits']['total'], 2) self.assertEqual(sorted([res['_source'] for res in self.raw_search('*:*')['hits']['hits']], key=operator.itemgetter('django_id')), [ { 'django_id': '2', 'django_ct': 'core.mockmodel', 'name': 'daniel2', 'name_exact': 'daniel2', 'text': 'Indexed!\n2', 'pub_date': '2009-02-23T00:00:00', 'id': 'core.mockmodel.2' }, { 'django_id': '3', 'django_ct': 'core.mockmodel', 'name': 'daniel3', 'name_exact': 'daniel3', 'text': 'Indexed!\n3', 'pub_date': '2009-02-22T00:00:00', 'id': 'core.mockmodel.3' } ]) def test_remove_succeeds_on_404(self): self.sb.silently_fail = False self.sb.remove('core.mockmodel.421') def test_clear(self): self.sb.update(self.smmi, self.sample_objs) self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 3) self.sb.clear() self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 0) self.sb.update(self.smmi, self.sample_objs) self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 3) self.sb.clear([AnotherMockModel]) self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 3) self.sb.clear([MockModel]) self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 0) self.sb.update(self.smmi, self.sample_objs) self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 3) self.sb.clear([AnotherMockModel, MockModel]) self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 0) def test_search(self): self.sb.update(self.smmi, self.sample_objs) self.assertEqual(self.raw_search('*:*')['hits']['total'], 3) self.assertEqual(self.sb.search(''), {'hits': 0, 'results': []}) self.assertEqual(self.sb.search('*:*')['hits'], 3) self.assertEqual(set([result.pk for result in self.sb.search('*:*')['results']]), {u'2', u'1', u'3'}) self.assertEqual(self.sb.search('', highlight=True), {'hits': 0, 'results': []}) self.assertEqual(self.sb.search('Index', highlight=True)['hits'], 3) self.assertEqual( sorted([result.highlighted[0] for result in self.sb.search('Index', highlight=True)['results']]), [u'Indexed!\n1', u'Indexed!\n2', u'Indexed!\n3']) self.assertEqual(self.sb.search('Indx')['hits'], 0) self.assertEqual(self.sb.search('indaxed')['spelling_suggestion'], 'indexed') self.assertEqual(self.sb.search('arf', spelling_query='indexyd')['spelling_suggestion'], 'indexed') self.assertEqual(self.sb.search('', facets={'name': {}}), {'hits': 0, 'results': []}) results = self.sb.search('Index', facets={'name': {}}) self.assertEqual(results['hits'], 3) self.assertSetEqual( set(results['facets']['fields']['name']), {('daniel3', 1), ('daniel2', 1), ('daniel1', 1)} ) self.assertEqual(self.sb.search('', date_facets={ 'pub_date': {'start_date': datetime.date(2008, 1, 1), 'end_date': datetime.date(2009, 4, 1), 'gap_by': 'month', 'gap_amount': 1}}), {'hits': 0, 'results': []}) results = self.sb.search('Index', date_facets={ 'pub_date': {'start_date': datetime.date(2008, 1, 1), 'end_date': datetime.date(2009, 4, 1), 'gap_by': 'month', 'gap_amount': 1}}) self.assertEqual(results['hits'], 3) self.assertEqual(results['facets']['dates']['pub_date'], [(datetime.datetime(2009, 2, 1, 0, 0), 3)]) self.assertEqual(self.sb.search('', query_facets=[('name', '[* TO e]')]), {'hits': 0, 'results': []}) results = self.sb.search('Index', query_facets=[('name', '[* TO e]')]) self.assertEqual(results['hits'], 3) self.assertEqual(results['facets']['queries'], {u'name': 3}) self.assertEqual(self.sb.search('', narrow_queries={'name:daniel1'}), {'hits': 0, 'results': []}) results = self.sb.search('Index', narrow_queries={'name:daniel1'}) self.assertEqual(results['hits'], 1) # Ensure that swapping the ``result_class`` works. self.assertTrue( isinstance(self.sb.search(u'index', result_class=MockSearchResult)['results'][0], MockSearchResult)) # Check the use of ``limit_to_registered_models``. self.assertEqual(self.sb.search('', limit_to_registered_models=False), {'hits': 0, 'results': []}) self.assertEqual(self.sb.search('*:*', limit_to_registered_models=False)['hits'], 3) self.assertEqual( sorted([result.pk for result in self.sb.search('*:*', limit_to_registered_models=False)['results']]), ['1', '2', '3']) # Stow. old_limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) settings.HAYSTACK_LIMIT_TO_REGISTERED_MODELS = False self.assertEqual(self.sb.search(''), {'hits': 0, 'results': []}) self.assertEqual(self.sb.search('*:*')['hits'], 3) self.assertEqual(sorted([result.pk for result in self.sb.search('*:*')['results']]), ['1', '2', '3']) # Restore. settings.HAYSTACK_LIMIT_TO_REGISTERED_MODELS = old_limit_to_registered_models def test_spatial_search_parameters(self): p1 = Point(1.23, 4.56) kwargs = self.sb.build_search_kwargs('*:*', distance_point={'field': 'location', 'point': p1}, sort_by=(('distance', 'desc'),)) self.assertIn('sort', kwargs) self.assertEqual(1, len(kwargs['sort'])) geo_d = kwargs['sort'][0]['_geo_distance'] # ElasticSearch supports the GeoJSON-style lng, lat pairs so unlike Solr the values should be # in the same order as we used to create the Point(): # http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl-geo-distance-filter.html#_lat_lon_as_array_4 self.assertDictEqual(geo_d, {'location': [1.23, 4.56], 'unit': 'km', 'order': 'desc'}) def test_more_like_this(self): self.sb.update(self.smmi, self.sample_objs) self.assertEqual(self.raw_search('*:*')['hits']['total'], 3) # A functional MLT example with enough data to work is below. Rely on # this to ensure the API is correct enough. self.assertEqual(self.sb.more_like_this(self.sample_objs[0])['hits'], 0) self.assertEqual([result.pk for result in self.sb.more_like_this(self.sample_objs[0])['results']], []) def test_build_schema(self): old_ui = connections['elasticsearch'].get_unified_index() (content_field_name, mapping) = self.sb.build_schema(old_ui.all_searchfields()) self.assertEqual(content_field_name, 'text') self.assertEqual(len(mapping), 4 + 2) # +2 management fields self.assertEqual(mapping, { 'django_id': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, 'django_ct': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, 'text': {'type': 'string', 'analyzer': 'snowball'}, 'pub_date': {'type': 'date'}, 'name': {'type': 'string', 'analyzer': 'snowball'}, 'name_exact': {'index': 'not_analyzed', 'type': 'string'} }) ui = UnifiedIndex() ui.build(indexes=[Elasticsearch2ComplexFacetsMockSearchIndex()]) (content_field_name, mapping) = self.sb.build_schema(ui.all_searchfields()) self.assertEqual(content_field_name, 'text') self.assertEqual(len(mapping), 15 + 2) # +2 management fields self.assertEqual(mapping, { 'django_id': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, 'django_ct': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, 'name': {'type': 'string', 'analyzer': 'snowball'}, 'is_active_exact': {'type': 'boolean'}, 'created': {'type': 'date'}, 'post_count': {'type': 'long'}, 'created_exact': {'type': 'date'}, 'sites_exact': {'index': 'not_analyzed', 'type': 'string'}, 'is_active': {'type': 'boolean'}, 'sites': {'type': 'string', 'analyzer': 'snowball'}, 'post_count_i': {'type': 'long'}, 'average_rating': {'type': 'float'}, 'text': {'type': 'string', 'analyzer': 'snowball'}, 'pub_date_exact': {'type': 'date'}, 'name_exact': {'index': 'not_analyzed', 'type': 'string'}, 'pub_date': {'type': 'date'}, 'average_rating_exact': {'type': 'float'} }) def test_verify_type(self): old_ui = connections['elasticsearch'].get_unified_index() ui = UnifiedIndex() smtmmi = Elasticsearch2MaintainTypeMockSearchIndex() ui.build(indexes=[smtmmi]) connections['elasticsearch']._index = ui sb = connections['elasticsearch'].get_backend() sb.update(smtmmi, self.sample_objs) self.assertEqual(sb.search('*:*')['hits'], 3) self.assertEqual([result.month for result in sb.search('*:*')['results']], [u'02', u'02', u'02']) connections['elasticsearch']._index = old_ui class CaptureHandler(std_logging.Handler): logs_seen = [] def emit(self, record): CaptureHandler.logs_seen.append(record) class FailedElasticsearch2SearchBackendTestCase(TestCase): def setUp(self): self.sample_objs = [] for i in range(1, 4): mock = MockModel() mock.id = i mock.author = 'daniel%s' % i mock.pub_date = datetime.date(2009, 2, 25) - datetime.timedelta(days=i) self.sample_objs.append(mock) # Stow. # Point the backend at a URL that doesn't exist so we can watch the # sparks fly. self.old_es_url = settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'] settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'] = "%s/foo/" % self.old_es_url self.cap = CaptureHandler() logging.getLogger('haystack').addHandler(self.cap) config = apps.get_app_config('haystack') logging.getLogger('haystack').removeHandler(config.stream) # Setup the rest of the bits. self.old_ui = connections['elasticsearch'].get_unified_index() ui = UnifiedIndex() self.smmi = Elasticsearch2MockSearchIndex() ui.build(indexes=[self.smmi]) connections['elasticsearch']._index = ui self.sb = connections['elasticsearch'].get_backend() def tearDown(self): # Restore. settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'] = self.old_es_url connections['elasticsearch']._index = self.old_ui config = apps.get_app_config('haystack') logging.getLogger('haystack').removeHandler(self.cap) logging.getLogger('haystack').addHandler(config.stream) @unittest.expectedFailure def test_all_cases(self): # Prior to the addition of the try/except bits, these would all fail miserably. self.assertEqual(len(CaptureHandler.logs_seen), 0) self.sb.update(self.smmi, self.sample_objs) self.assertEqual(len(CaptureHandler.logs_seen), 1) self.sb.remove(self.sample_objs[0]) self.assertEqual(len(CaptureHandler.logs_seen), 2) self.sb.search('search') self.assertEqual(len(CaptureHandler.logs_seen), 3) self.sb.more_like_this(self.sample_objs[0]) self.assertEqual(len(CaptureHandler.logs_seen), 4) self.sb.clear([MockModel]) self.assertEqual(len(CaptureHandler.logs_seen), 5) self.sb.clear() self.assertEqual(len(CaptureHandler.logs_seen), 6) class LiveElasticsearch2SearchQueryTestCase(TestCase): fixtures = ['base_data.json'] def setUp(self): super(LiveElasticsearch2SearchQueryTestCase, self).setUp() # Wipe it clean. clear_elasticsearch_index() # Stow. self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2MockSearchIndex() self.ui.build(indexes=[self.smmi]) connections['elasticsearch']._index = self.ui self.sb = connections['elasticsearch'].get_backend() self.sq = connections['elasticsearch'].get_query() # Force indexing of the content. self.smmi.update(using='elasticsearch') def tearDown(self): connections['elasticsearch']._index = self.old_ui super(LiveElasticsearch2SearchQueryTestCase, self).tearDown() def test_log_query(self): reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) with self.settings(DEBUG=False): len(self.sq.get_results()) self.assertEqual(len(connections['elasticsearch'].queries), 0) with self.settings(DEBUG=True): # Redefine it to clear out the cached results. self.sq = connections['elasticsearch'].query(using='elasticsearch') self.sq.add_filter(SQ(name='bar')) len(self.sq.get_results()) self.assertEqual(len(connections['elasticsearch'].queries), 1) self.assertEqual(connections['elasticsearch'].queries[0]['query_string'], 'name:(bar)') # And again, for good measure. self.sq = connections['elasticsearch'].query('elasticsearch') self.sq.add_filter(SQ(name='bar')) self.sq.add_filter(SQ(text='moof')) len(self.sq.get_results()) self.assertEqual(len(connections['elasticsearch'].queries), 2) self.assertEqual(connections['elasticsearch'].queries[0]['query_string'], 'name:(bar)') self.assertEqual(connections['elasticsearch'].queries[1]['query_string'], u'(name:(bar) AND text:(moof))') lssqstc_all_loaded = None @override_settings(DEBUG=True) class LiveElasticsearch2SearchQuerySetTestCase(TestCase): """Used to test actual implementation details of the SearchQuerySet.""" fixtures = ['bulk_data.json'] def setUp(self): super(LiveElasticsearch2SearchQuerySetTestCase, self).setUp() # Stow. self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2MockSearchIndex() self.ui.build(indexes=[self.smmi]) connections['elasticsearch']._index = self.ui self.sqs = SearchQuerySet('elasticsearch') self.rsqs = RelatedSearchQuerySet('elasticsearch') # Ugly but not constantly reindexing saves us almost 50% runtime. global lssqstc_all_loaded if lssqstc_all_loaded is None: lssqstc_all_loaded = True # Wipe it clean. clear_elasticsearch_index() # Force indexing of the content. self.smmi.update(using='elasticsearch') def tearDown(self): # Restore. connections['elasticsearch']._index = self.old_ui super(LiveElasticsearch2SearchQuerySetTestCase, self).tearDown() def test_load_all(self): sqs = self.sqs.order_by('pub_date').load_all() self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertTrue(len(sqs) > 0) self.assertEqual(sqs[2].object.foo, u'In addition, you may specify other fields to be populated along with the document. In this case, we also index the user who authored the document as well as the date the document was published. The variable you assign the SearchField to should directly map to the field your search backend is expecting. You instantiate most search fields with a parameter that points to the attribute of the object to populate that field with.') def test_iter(self): reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) sqs = self.sqs.all() results = sorted([int(result.pk) for result in sqs]) self.assertEqual(results, list(range(1, 24))) self.assertEqual(len(connections['elasticsearch'].queries), 3) def test_slice(self): reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.sqs.all().order_by('pub_date') self.assertEqual([int(result.pk) for result in results[1:11]], [3, 2, 4, 5, 6, 7, 8, 9, 10, 11]) self.assertEqual(len(connections['elasticsearch'].queries), 1) reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.sqs.all().order_by('pub_date') self.assertEqual(int(results[21].pk), 22) self.assertEqual(len(connections['elasticsearch'].queries), 1) def test_values_slicing(self): reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) # TODO: this would be a good candidate for refactoring into a TestCase subclass shared across backends # The values will come back as strings because Hasytack doesn't assume PKs are integers. # We'll prepare this set once since we're going to query the same results in multiple ways: expected_pks = [str(i) for i in [3, 2, 4, 5, 6, 7, 8, 9, 10, 11]] results = self.sqs.all().order_by('pub_date').values('pk') self.assertListEqual([i['pk'] for i in results[1:11]], expected_pks) results = self.sqs.all().order_by('pub_date').values_list('pk') self.assertListEqual([i[0] for i in results[1:11]], expected_pks) results = self.sqs.all().order_by('pub_date').values_list('pk', flat=True) self.assertListEqual(results[1:11], expected_pks) self.assertEqual(len(connections['elasticsearch'].queries), 3) def test_count(self): reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) sqs = self.sqs.all() self.assertEqual(sqs.count(), 23) self.assertEqual(sqs.count(), 23) self.assertEqual(len(sqs), 23) self.assertEqual(sqs.count(), 23) # Should only execute one query to count the length of the result set. self.assertEqual(len(connections['elasticsearch'].queries), 1) def test_manual_iter(self): results = self.sqs.all() reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) results = set([int(result.pk) for result in results._manual_iter()]) self.assertEqual(results, {2, 7, 12, 17, 1, 6, 11, 16, 23, 5, 10, 15, 22, 4, 9, 14, 19, 21, 3, 8, 13, 18, 20}) self.assertEqual(len(connections['elasticsearch'].queries), 3) def test_fill_cache(self): reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.sqs.all() self.assertEqual(len(results._result_cache), 0) self.assertEqual(len(connections['elasticsearch'].queries), 0) results._fill_cache(0, 10) self.assertEqual(len([result for result in results._result_cache if result is not None]), 10) self.assertEqual(len(connections['elasticsearch'].queries), 1) results._fill_cache(10, 20) self.assertEqual(len([result for result in results._result_cache if result is not None]), 20) self.assertEqual(len(connections['elasticsearch'].queries), 2) def test_cache_is_full(self): reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) self.assertEqual(self.sqs._cache_is_full(), False) results = self.sqs.all() fire_the_iterator_and_fill_cache = [result for result in results] self.assertEqual(results._cache_is_full(), True) self.assertEqual(len(connections['elasticsearch'].queries), 3) def test___and__(self): sqs1 = self.sqs.filter(content='foo') sqs2 = self.sqs.filter(content='bar') sqs = sqs1 & sqs2 self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.query_filter), 2) self.assertEqual(sqs.query.build_query(), u'((foo) AND (bar))') # Now for something more complex... sqs3 = self.sqs.exclude(title='moof').filter(SQ(content='foo') | SQ(content='baz')) sqs4 = self.sqs.filter(content='bar') sqs = sqs3 & sqs4 self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.query_filter), 3) self.assertEqual(sqs.query.build_query(), u'(NOT (title:(moof)) AND ((foo) OR (baz)) AND (bar))') def test___or__(self): sqs1 = self.sqs.filter(content='foo') sqs2 = self.sqs.filter(content='bar') sqs = sqs1 | sqs2 self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.query_filter), 2) self.assertEqual(sqs.query.build_query(), u'((foo) OR (bar))') # Now for something more complex... sqs3 = self.sqs.exclude(title='moof').filter(SQ(content='foo') | SQ(content='baz')) sqs4 = self.sqs.filter(content='bar').models(MockModel) sqs = sqs3 | sqs4 self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.query_filter), 2) self.assertEqual(sqs.query.build_query(), u'((NOT (title:(moof)) AND ((foo) OR (baz))) OR (bar))') def test_auto_query(self): # Ensure bits in exact matches get escaped properly as well. # This will break horrifically if escaping isn't working. sqs = self.sqs.auto_query('"pants:rule"') self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(repr(sqs.query.query_filter), '') self.assertEqual(sqs.query.build_query(), u'("pants\\:rule")') self.assertEqual(len(sqs), 0) # Regressions def test_regression_proper_start_offsets(self): sqs = self.sqs.filter(text='index') self.assertNotEqual(sqs.count(), 0) id_counts = {} for item in sqs: if item.id in id_counts: id_counts[item.id] += 1 else: id_counts[item.id] = 1 for key, value in id_counts.items(): if value > 1: self.fail("Result with id '%s' seen more than once in the results." % key) def test_regression_raw_search_breaks_slicing(self): sqs = self.sqs.raw_search('text:index') page_1 = [result.pk for result in sqs[0:10]] page_2 = [result.pk for result in sqs[10:20]] for pk in page_2: if pk in page_1: self.fail("Result with id '%s' seen more than once in the results." % pk) # RelatedSearchQuerySet Tests def test_related_load_all(self): sqs = self.rsqs.order_by('pub_date').load_all() self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertTrue(len(sqs) > 0) self.assertEqual(sqs[2].object.foo, u'In addition, you may specify other fields to be populated along with the document. In this case, we also index the user who authored the document as well as the date the document was published. The variable you assign the SearchField to should directly map to the field your search backend is expecting. You instantiate most search fields with a parameter that points to the attribute of the object to populate that field with.') def test_related_load_all_queryset(self): sqs = self.rsqs.load_all().order_by('pub_date') self.assertEqual(len(sqs._load_all_querysets), 0) sqs = sqs.load_all_queryset(MockModel, MockModel.objects.filter(id__gt=1)) self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs._load_all_querysets), 1) self.assertEqual(sorted([obj.object.id for obj in sqs]), list(range(2, 24))) sqs = sqs.load_all_queryset(MockModel, MockModel.objects.filter(id__gt=10)) self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs._load_all_querysets), 1) self.assertEqual(set([obj.object.id for obj in sqs]), {12, 17, 11, 16, 23, 15, 22, 14, 19, 21, 13, 18, 20}) self.assertEqual(set([obj.object.id for obj in sqs[10:20]]), {21, 22, 23}) def test_related_iter(self): reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) sqs = self.rsqs.all() results = set([int(result.pk) for result in sqs]) self.assertEqual(results, {2, 7, 12, 17, 1, 6, 11, 16, 23, 5, 10, 15, 22, 4, 9, 14, 19, 21, 3, 8, 13, 18, 20}) self.assertEqual(len(connections['elasticsearch'].queries), 3) def test_related_slice(self): reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.rsqs.all().order_by('pub_date') self.assertEqual([int(result.pk) for result in results[1:11]], [3, 2, 4, 5, 6, 7, 8, 9, 10, 11]) self.assertEqual(len(connections['elasticsearch'].queries), 1) reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.rsqs.all().order_by('pub_date') self.assertEqual(int(results[21].pk), 22) self.assertEqual(len(connections['elasticsearch'].queries), 1) reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.rsqs.all().order_by('pub_date') self.assertEqual(set([int(result.pk) for result in results[20:30]]), {21, 22, 23}) self.assertEqual(len(connections['elasticsearch'].queries), 1) def test_related_manual_iter(self): results = self.rsqs.all() reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) results = sorted([int(result.pk) for result in results._manual_iter()]) self.assertEqual(results, list(range(1, 24))) self.assertEqual(len(connections['elasticsearch'].queries), 3) def test_related_fill_cache(self): reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.rsqs.all() self.assertEqual(len(results._result_cache), 0) self.assertEqual(len(connections['elasticsearch'].queries), 0) results._fill_cache(0, 10) self.assertEqual(len([result for result in results._result_cache if result is not None]), 10) self.assertEqual(len(connections['elasticsearch'].queries), 1) results._fill_cache(10, 20) self.assertEqual(len([result for result in results._result_cache if result is not None]), 20) self.assertEqual(len(connections['elasticsearch'].queries), 2) def test_related_cache_is_full(self): reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) self.assertEqual(self.rsqs._cache_is_full(), False) results = self.rsqs.all() fire_the_iterator_and_fill_cache = [result for result in results] self.assertEqual(results._cache_is_full(), True) self.assertEqual(len(connections['elasticsearch'].queries), 3) def test_quotes_regression(self): sqs = self.sqs.auto_query(u"44°48'40''N 20°28'32''E") # Should not have empty terms. self.assertEqual(sqs.query.build_query(), u"(44\xb048'40''N 20\xb028'32''E)") # Should not cause Elasticsearch to 500. self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('blazing') self.assertEqual(sqs.query.build_query(), u'(blazing)') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('blazing saddles') self.assertEqual(sqs.query.build_query(), u'(blazing saddles)') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('"blazing saddles') self.assertEqual(sqs.query.build_query(), u'(\\"blazing saddles)') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('"blazing saddles"') self.assertEqual(sqs.query.build_query(), u'("blazing saddles")') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('mel "blazing saddles"') self.assertEqual(sqs.query.build_query(), u'(mel "blazing saddles")') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('mel "blazing \'saddles"') self.assertEqual(sqs.query.build_query(), u'(mel "blazing \'saddles")') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('mel "blazing \'\'saddles"') self.assertEqual(sqs.query.build_query(), u'(mel "blazing \'\'saddles")') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('mel "blazing \'\'saddles"\'') self.assertEqual(sqs.query.build_query(), u'(mel "blazing \'\'saddles" \')') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('mel "blazing \'\'saddles"\'"') self.assertEqual(sqs.query.build_query(), u'(mel "blazing \'\'saddles" \'\\")') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('"blazing saddles" mel') self.assertEqual(sqs.query.build_query(), u'("blazing saddles" mel)') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('"blazing saddles" mel brooks') self.assertEqual(sqs.query.build_query(), u'("blazing saddles" mel brooks)') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('mel "blazing saddles" brooks') self.assertEqual(sqs.query.build_query(), u'(mel "blazing saddles" brooks)') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('mel "blazing saddles" "brooks') self.assertEqual(sqs.query.build_query(), u'(mel "blazing saddles" \\"brooks)') self.assertEqual(sqs.count(), 0) def test_query_generation(self): sqs = self.sqs.filter(SQ(content=AutoQuery("hello world")) | SQ(title=AutoQuery("hello world"))) self.assertEqual(sqs.query.build_query(), u"((hello world) OR title:(hello world))") def test_result_class(self): # Assert that we're defaulting to ``SearchResult``. sqs = self.sqs.all() self.assertTrue(isinstance(sqs[0], SearchResult)) # Custom class. sqs = self.sqs.result_class(MockSearchResult).all() self.assertTrue(isinstance(sqs[0], MockSearchResult)) # Reset to default. sqs = self.sqs.result_class(None).all() self.assertTrue(isinstance(sqs[0], SearchResult)) @override_settings(DEBUG=True) class LiveElasticsearch2SpellingTestCase(TestCase): """Used to test actual implementation details of the SearchQuerySet.""" fixtures = ['bulk_data.json'] def setUp(self): super(LiveElasticsearch2SpellingTestCase, self).setUp() # Stow. self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2MockSpellingIndex() self.ui.build(indexes=[self.smmi]) connections['elasticsearch']._index = self.ui self.sqs = SearchQuerySet('elasticsearch') # Wipe it clean. clear_elasticsearch_index() # Reboot the schema. self.sb = connections['elasticsearch'].get_backend() self.sb.setup() self.smmi.update(using='elasticsearch') def tearDown(self): # Restore. connections['elasticsearch']._index = self.old_ui super(LiveElasticsearch2SpellingTestCase, self).tearDown() def test_spelling(self): self.assertEqual(self.sqs.auto_query('structurd').spelling_suggestion(), 'structured') self.assertEqual(self.sqs.spelling_suggestion('structurd'), 'structured') self.assertEqual(self.sqs.auto_query('srchindex instanc').spelling_suggestion(), 'searchindex instance') self.assertEqual(self.sqs.spelling_suggestion('srchindex instanc'), 'searchindex instance') class LiveElasticsearch2MoreLikeThisTestCase(TestCase): fixtures = ['bulk_data.json'] def setUp(self): super(LiveElasticsearch2MoreLikeThisTestCase, self).setUp() # Wipe it clean. clear_elasticsearch_index() self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2MockModelSearchIndex() self.sammi = Elasticsearch2AnotherMockModelSearchIndex() self.ui.build(indexes=[self.smmi, self.sammi]) connections['elasticsearch']._index = self.ui self.sqs = SearchQuerySet('elasticsearch') self.smmi.update(using='elasticsearch') self.sammi.update(using='elasticsearch') def tearDown(self): # Restore. connections['elasticsearch']._index = self.old_ui super(LiveElasticsearch2MoreLikeThisTestCase, self).tearDown() def test_more_like_this(self): mlt = self.sqs.more_like_this(MockModel.objects.get(pk=1)) results = [result.pk for result in mlt] self.assertEqual(mlt.count(), 11) self.assertEqual(set(results), {u'10', u'5', u'2', u'21', u'4', u'6', u'23', u'9', u'14'}) self.assertEqual(len(results), 10) alt_mlt = self.sqs.filter(name='daniel3').more_like_this(MockModel.objects.get(pk=2)) results = [result.pk for result in alt_mlt] self.assertEqual(alt_mlt.count(), 9) self.assertEqual(set(results), {u'2', u'16', u'3', u'19', u'4', u'17', u'10', u'22', u'23'}) self.assertEqual(len(results), 9) alt_mlt_with_models = self.sqs.models(MockModel).more_like_this(MockModel.objects.get(pk=1)) results = [result.pk for result in alt_mlt_with_models] self.assertEqual(alt_mlt_with_models.count(), 10) self.assertEqual(set(results), {u'10', u'5', u'21', u'2', u'4', u'6', u'23', u'9', u'14', u'16'}) self.assertEqual(len(results), 10) if hasattr(MockModel.objects, 'defer'): # Make sure MLT works with deferred bits. qs = MockModel.objects.defer('foo') self.assertEqual(qs.query.deferred_loading[1], True) deferred = self.sqs.models(MockModel).more_like_this(qs.get(pk=1)) self.assertEqual(deferred.count(), 10) self.assertEqual({result.pk for result in deferred}, {u'10', u'5', u'21', u'2', u'4', u'6', u'23', u'9', u'14', u'16'}) self.assertEqual(len([result.pk for result in deferred]), 10) # Ensure that swapping the ``result_class`` works. self.assertTrue( isinstance(self.sqs.result_class(MockSearchResult).more_like_this(MockModel.objects.get(pk=1))[0], MockSearchResult)) class LiveElasticsearch2AutocompleteTestCase(TestCase): fixtures = ['bulk_data.json'] def setUp(self): super(LiveElasticsearch2AutocompleteTestCase, self).setUp() # Stow. self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2AutocompleteMockModelSearchIndex() self.ui.build(indexes=[self.smmi]) connections['elasticsearch']._index = self.ui self.sqs = SearchQuerySet('elasticsearch') # Wipe it clean. clear_elasticsearch_index() # Reboot the schema. self.sb = connections['elasticsearch'].get_backend() self.sb.setup() self.smmi.update(using='elasticsearch') def tearDown(self): # Restore. connections['elasticsearch']._index = self.old_ui super(LiveElasticsearch2AutocompleteTestCase, self).tearDown() def test_build_schema(self): self.sb = connections['elasticsearch'].get_backend() content_name, mapping = self.sb.build_schema(self.ui.all_searchfields()) self.assertEqual(mapping, { 'django_id': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, 'django_ct': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, 'name_auto': { 'type': 'string', 'analyzer': 'edgengram_analyzer', }, 'text': { 'type': 'string', 'analyzer': 'snowball', }, 'pub_date': { 'type': 'date' }, 'name': { 'type': 'string', 'analyzer': 'snowball', }, 'text_auto': { 'type': 'string', 'analyzer': 'edgengram_analyzer', } }) def test_autocomplete(self): autocomplete = self.sqs.autocomplete(text_auto='mod') self.assertEqual(autocomplete.count(), 16) self.assertEqual(set([result.pk for result in autocomplete]), {'1', '12', '6', '14', '7', '4', '23', '17', '13', '18', '20', '22', '19', '15', '10', '2'}) self.assertTrue('mod' in autocomplete[0].text.lower()) self.assertTrue('mod' in autocomplete[1].text.lower()) self.assertTrue('mod' in autocomplete[2].text.lower()) self.assertTrue('mod' in autocomplete[3].text.lower()) self.assertTrue('mod' in autocomplete[4].text.lower()) self.assertEqual(len([result.pk for result in autocomplete]), 16) # Test multiple words. autocomplete_2 = self.sqs.autocomplete(text_auto='your mod') self.assertEqual(autocomplete_2.count(), 13) self.assertEqual(set([result.pk for result in autocomplete_2]), {'1', '6', '2', '14', '12', '13', '10', '19', '4', '20', '23', '22', '15'}) map_results = {result.pk: result for result in autocomplete_2} self.assertTrue('your' in map_results['1'].text.lower()) self.assertTrue('mod' in map_results['1'].text.lower()) self.assertTrue('your' in map_results['6'].text.lower()) self.assertTrue('mod' in map_results['6'].text.lower()) self.assertTrue('your' in map_results['2'].text.lower()) self.assertEqual(len([result.pk for result in autocomplete_2]), 13) # Test multiple fields. autocomplete_3 = self.sqs.autocomplete(text_auto='Django', name_auto='dan') self.assertEqual(autocomplete_3.count(), 4) self.assertEqual(set([result.pk for result in autocomplete_3]), {'12', '1', '22', '14'}) self.assertEqual(len([result.pk for result in autocomplete_3]), 4) # Test numbers in phrases autocomplete_4 = self.sqs.autocomplete(text_auto='Jen 867') self.assertEqual(autocomplete_4.count(), 1) self.assertEqual(set([result.pk for result in autocomplete_4]), {'20'}) # Test numbers alone autocomplete_4 = self.sqs.autocomplete(text_auto='867') self.assertEqual(autocomplete_4.count(), 1) self.assertEqual(set([result.pk for result in autocomplete_4]), {'20'}) class LiveElasticsearch2RoundTripTestCase(TestCase): def setUp(self): super(LiveElasticsearch2RoundTripTestCase, self).setUp() # Wipe it clean. clear_elasticsearch_index() # Stow. self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.srtsi = Elasticsearch2RoundTripSearchIndex() self.ui.build(indexes=[self.srtsi]) connections['elasticsearch']._index = self.ui self.sb = connections['elasticsearch'].get_backend() self.sqs = SearchQuerySet('elasticsearch') # Fake indexing. mock = MockModel() mock.id = 1 self.sb.update(self.srtsi, [mock]) def tearDown(self): # Restore. connections['elasticsearch']._index = self.old_ui super(LiveElasticsearch2RoundTripTestCase, self).tearDown() def test_round_trip(self): results = self.sqs.filter(id='core.mockmodel.1') # Sanity check. self.assertEqual(results.count(), 1) # Check the individual fields. result = results[0] self.assertEqual(result.id, 'core.mockmodel.1') self.assertEqual(result.text, 'This is some example text.') self.assertEqual(result.name, 'Mister Pants') self.assertEqual(result.is_active, True) self.assertEqual(result.post_count, 25) self.assertEqual(result.average_rating, 3.6) self.assertEqual(result.price, u'24.99') self.assertEqual(result.pub_date, datetime.date(2009, 11, 21)) self.assertEqual(result.created, datetime.datetime(2009, 11, 21, 21, 31, 00)) self.assertEqual(result.tags, ['staff', 'outdoor', 'activist', 'scientist']) self.assertEqual(result.sites, [3, 5, 1]) @unittest.skipUnless(test_pickling, 'Skipping pickling tests') class LiveElasticsearch2PickleTestCase(TestCase): fixtures = ['bulk_data.json'] def setUp(self): super(LiveElasticsearch2PickleTestCase, self).setUp() # Wipe it clean. clear_elasticsearch_index() # Stow. self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2MockModelSearchIndex() self.sammi = Elasticsearch2AnotherMockModelSearchIndex() self.ui.build(indexes=[self.smmi, self.sammi]) connections['elasticsearch']._index = self.ui self.sqs = SearchQuerySet('elasticsearch') self.smmi.update(using='elasticsearch') self.sammi.update(using='elasticsearch') def tearDown(self): # Restore. connections['elasticsearch']._index = self.old_ui super(LiveElasticsearch2PickleTestCase, self).tearDown() def test_pickling(self): results = self.sqs.all() for res in results: # Make sure the cache is full. pass in_a_pickle = pickle.dumps(results) like_a_cuke = pickle.loads(in_a_pickle) self.assertEqual(len(like_a_cuke), len(results)) self.assertEqual(like_a_cuke[0].id, results[0].id) class Elasticsearch2BoostBackendTestCase(TestCase): def setUp(self): super(Elasticsearch2BoostBackendTestCase, self).setUp() # Wipe it clean. self.raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL']) clear_elasticsearch_index() # Stow. self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2BoostMockSearchIndex() self.ui.build(indexes=[self.smmi]) connections['elasticsearch']._index = self.ui self.sb = connections['elasticsearch'].get_backend() self.sample_objs = [] for i in range(1, 5): mock = AFourthMockModel() mock.id = i if i % 2: mock.author = 'daniel' mock.editor = 'david' else: mock.author = 'david' mock.editor = 'daniel' mock.pub_date = datetime.date(2009, 2, 25) - datetime.timedelta(days=i) self.sample_objs.append(mock) def tearDown(self): connections['elasticsearch']._index = self.old_ui super(Elasticsearch2BoostBackendTestCase, self).tearDown() def raw_search(self, query): return self.raw_es.search(q='*:*', index=settings.HAYSTACK_CONNECTIONS['elasticsearch']['INDEX_NAME']) def test_boost(self): self.sb.update(self.smmi, self.sample_objs) self.assertEqual(self.raw_search('*:*')['hits']['total'], 4) results = SearchQuerySet(using='elasticsearch').filter(SQ(author='daniel') | SQ(editor='daniel')) self.assertEqual(set([result.id for result in results]), {'core.afourthmockmodel.4', 'core.afourthmockmodel.3', 'core.afourthmockmodel.1', 'core.afourthmockmodel.2'}) def test__to_python(self): self.assertEqual(self.sb._to_python('abc'), 'abc') self.assertEqual(self.sb._to_python('1'), 1) self.assertEqual(self.sb._to_python('2653'), 2653) self.assertEqual(self.sb._to_python('25.5'), 25.5) self.assertEqual(self.sb._to_python('[1, 2, 3]'), [1, 2, 3]) self.assertEqual(self.sb._to_python('{"a": 1, "b": 2, "c": 3}'), {'a': 1, 'c': 3, 'b': 2}) self.assertEqual(self.sb._to_python('2009-05-09T16:14:00'), datetime.datetime(2009, 5, 9, 16, 14)) self.assertEqual(self.sb._to_python('2009-05-09T00:00:00'), datetime.datetime(2009, 5, 9, 0, 0)) self.assertEqual(self.sb._to_python(None), None) class RecreateIndexTestCase(TestCase): def setUp(self): self.raw_es = elasticsearch.Elasticsearch( settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL']) def test_recreate_index(self): clear_elasticsearch_index() sb = connections['elasticsearch'].get_backend() sb.silently_fail = True sb.setup() original_mapping = self.raw_es.indices.get_mapping(index=sb.index_name) sb.clear() sb.setup() try: updated_mapping = self.raw_es.indices.get_mapping(sb.index_name) except elasticsearch.NotFoundError: self.fail("There is no mapping after recreating the index") self.assertEqual(original_mapping, updated_mapping, "Mapping after recreating the index differs from the original one") class Elasticsearch2FacetingTestCase(TestCase): def setUp(self): super(Elasticsearch2FacetingTestCase, self).setUp() # Wipe it clean. clear_elasticsearch_index() # Stow. self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = Elasticsearch2FacetingMockSearchIndex() self.ui.build(indexes=[self.smmi]) connections['elasticsearch']._index = self.ui self.sb = connections['elasticsearch'].get_backend() # Force the backend to rebuild the mapping each time. self.sb.existing_mapping = {} self.sb.setup() self.sample_objs = [] for i in range(1, 10): mock = AFourthMockModel() mock.id = i if i > 5: mock.editor = 'George Taylor' else: mock.editor = 'Perry White' if i % 2: mock.author = 'Daniel Lindsley' else: mock.author = 'Dan Watson' mock.pub_date = datetime.date(2013, 9, (i % 4) + 1) self.sample_objs.append(mock) def tearDown(self): connections['elasticsearch']._index = self.old_ui super(Elasticsearch2FacetingTestCase, self).tearDown() def test_facet(self): self.sb.update(self.smmi, self.sample_objs) counts = SearchQuerySet('elasticsearch').facet('author').facet('editor').facet_counts() self.assertEqual(counts['fields']['author'], [ ('Daniel Lindsley', 5), ('Dan Watson', 4), ]) self.assertEqual(counts['fields']['editor'], [ ('Perry White', 5), ('George Taylor', 4), ]) counts = SearchQuerySet('elasticsearch').filter(content='white').facet('facet_field', order='reverse_count').facet_counts() self.assertEqual(counts['fields']['facet_field'], [ ('Dan Watson', 2), ('Daniel Lindsley', 3), ]) def test_multiple_narrow(self): self.sb.update(self.smmi, self.sample_objs) counts = SearchQuerySet('elasticsearch').narrow('editor_exact:"Perry White"').narrow( 'author_exact:"Daniel Lindsley"').facet('author').facet_counts() self.assertEqual(counts['fields']['author'], [ ('Daniel Lindsley', 3), ]) def test_narrow(self): self.sb.update(self.smmi, self.sample_objs) counts = SearchQuerySet('elasticsearch').facet('author').facet('editor').narrow( 'editor_exact:"Perry White"').facet_counts() self.assertEqual(counts['fields']['author'], [ ('Daniel Lindsley', 3), ('Dan Watson', 2), ]) self.assertEqual(counts['fields']['editor'], [ ('Perry White', 5), ]) def test_date_facet(self): self.sb.update(self.smmi, self.sample_objs) start = datetime.date(2013, 9, 1) end = datetime.date(2013, 9, 30) # Facet by day counts = SearchQuerySet('elasticsearch').date_facet('pub_date', start_date=start, end_date=end, gap_by='day').facet_counts() self.assertEqual(counts['dates']['pub_date'], [ (datetime.datetime(2013, 9, 1), 2), (datetime.datetime(2013, 9, 2), 3), (datetime.datetime(2013, 9, 3), 2), (datetime.datetime(2013, 9, 4), 2), ]) # By month counts = SearchQuerySet('elasticsearch').date_facet('pub_date', start_date=start, end_date=end, gap_by='month').facet_counts() self.assertEqual(counts['dates']['pub_date'], [ (datetime.datetime(2013, 9, 1), 9), ]) django-haystack-2.8.0/test_haystack/elasticsearch2_tests/test_inputs.py000066400000000000000000000070471325051407000265140ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import absolute_import, division, print_function, unicode_literals from django.test import TestCase from haystack import connections, inputs class Elasticsearch2InputTestCase(TestCase): def setUp(self): super(Elasticsearch2InputTestCase, self).setUp() self.query_obj = connections['elasticsearch'].get_query() def test_raw_init(self): raw = inputs.Raw('hello OR there, :you') self.assertEqual(raw.query_string, 'hello OR there, :you') self.assertEqual(raw.kwargs, {}) self.assertEqual(raw.post_process, False) raw = inputs.Raw('hello OR there, :you', test='really') self.assertEqual(raw.query_string, 'hello OR there, :you') self.assertEqual(raw.kwargs, {'test': 'really'}) self.assertEqual(raw.post_process, False) def test_raw_prepare(self): raw = inputs.Raw('hello OR there, :you') self.assertEqual(raw.prepare(self.query_obj), 'hello OR there, :you') def test_clean_init(self): clean = inputs.Clean('hello OR there, :you') self.assertEqual(clean.query_string, 'hello OR there, :you') self.assertEqual(clean.post_process, True) def test_clean_prepare(self): clean = inputs.Clean('hello OR there, :you') self.assertEqual(clean.prepare(self.query_obj), 'hello or there, \\:you') def test_exact_init(self): exact = inputs.Exact('hello OR there, :you') self.assertEqual(exact.query_string, 'hello OR there, :you') self.assertEqual(exact.post_process, True) def test_exact_prepare(self): exact = inputs.Exact('hello OR there, :you') self.assertEqual(exact.prepare(self.query_obj), u'"hello OR there, :you"') exact = inputs.Exact('hello OR there, :you', clean=True) self.assertEqual(exact.prepare(self.query_obj), u'"hello or there, \\:you"') def test_not_init(self): not_it = inputs.Not('hello OR there, :you') self.assertEqual(not_it.query_string, 'hello OR there, :you') self.assertEqual(not_it.post_process, True) def test_not_prepare(self): not_it = inputs.Not('hello OR there, :you') self.assertEqual(not_it.prepare(self.query_obj), u'NOT (hello or there, \\:you)') def test_autoquery_init(self): autoquery = inputs.AutoQuery('panic -don\'t "froody dude"') self.assertEqual(autoquery.query_string, 'panic -don\'t "froody dude"') self.assertEqual(autoquery.post_process, False) def test_autoquery_prepare(self): autoquery = inputs.AutoQuery('panic -don\'t "froody dude"') self.assertEqual(autoquery.prepare(self.query_obj), u'panic NOT don\'t "froody dude"') def test_altparser_init(self): altparser = inputs.AltParser('dismax') self.assertEqual(altparser.parser_name, 'dismax') self.assertEqual(altparser.query_string, '') self.assertEqual(altparser.kwargs, {}) self.assertEqual(altparser.post_process, False) altparser = inputs.AltParser('dismax', 'douglas adams', qf='author', mm=1) self.assertEqual(altparser.parser_name, 'dismax') self.assertEqual(altparser.query_string, 'douglas adams') self.assertEqual(altparser.kwargs, {'mm': 1, 'qf': 'author'}) self.assertEqual(altparser.post_process, False) def test_altparser_prepare(self): altparser = inputs.AltParser('dismax', 'douglas adams', qf='author', mm=1) self.assertEqual(altparser.prepare(self.query_obj), u"""{!dismax mm=1 qf=author v='douglas adams'}""") django-haystack-2.8.0/test_haystack/elasticsearch2_tests/test_query.py000066400000000000000000000230151325051407000263300ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import absolute_import, division, print_function, unicode_literals import datetime import elasticsearch from django.test import TestCase from haystack import connections from haystack.inputs import Exact from haystack.models import SearchResult from haystack.query import SearchQuerySet, SQ from haystack.utils.geo import D, Point from ..core.models import AnotherMockModel, MockModel class Elasticsearch2SearchQueryTestCase(TestCase): def setUp(self): super(Elasticsearch2SearchQueryTestCase, self).setUp() self.sq = connections['elasticsearch'].get_query() def test_build_query_all(self): self.assertEqual(self.sq.build_query(), '*:*') def test_build_query_single_word(self): self.sq.add_filter(SQ(content='hello')) self.assertEqual(self.sq.build_query(), '(hello)') def test_build_query_boolean(self): self.sq.add_filter(SQ(content=True)) self.assertEqual(self.sq.build_query(), '(True)') def test_regression_slash_search(self): self.sq.add_filter(SQ(content='hello/')) self.assertEqual(self.sq.build_query(), '(hello\\/)') def test_build_query_datetime(self): self.sq.add_filter(SQ(content=datetime.datetime(2009, 5, 8, 11, 28))) self.assertEqual(self.sq.build_query(), '(2009-05-08T11:28:00)') def test_build_query_multiple_words_and(self): self.sq.add_filter(SQ(content='hello')) self.sq.add_filter(SQ(content='world')) self.assertEqual(self.sq.build_query(), '((hello) AND (world))') def test_build_query_multiple_words_not(self): self.sq.add_filter(~SQ(content='hello')) self.sq.add_filter(~SQ(content='world')) self.assertEqual(self.sq.build_query(), '(NOT ((hello)) AND NOT ((world)))') def test_build_query_multiple_words_or(self): self.sq.add_filter(~SQ(content='hello')) self.sq.add_filter(SQ(content='hello'), use_or=True) self.assertEqual(self.sq.build_query(), '(NOT ((hello)) OR (hello))') def test_build_query_multiple_words_mixed(self): self.sq.add_filter(SQ(content='why')) self.sq.add_filter(SQ(content='hello'), use_or=True) self.sq.add_filter(~SQ(content='world')) self.assertEqual(self.sq.build_query(), u'(((why) OR (hello)) AND NOT ((world)))') def test_build_query_phrase(self): self.sq.add_filter(SQ(content='hello world')) self.assertEqual(self.sq.build_query(), '(hello AND world)') self.sq.add_filter(SQ(content__exact='hello world')) self.assertEqual(self.sq.build_query(), u'((hello AND world) AND ("hello world"))') def test_build_query_boost(self): self.sq.add_filter(SQ(content='hello')) self.sq.add_boost('world', 5) self.assertEqual(self.sq.build_query(), "(hello) world^5") def test_build_query_multiple_filter_types(self): self.sq.add_filter(SQ(content='why')) self.sq.add_filter(SQ(pub_date__lte=Exact('2009-02-10 01:59:00'))) self.sq.add_filter(SQ(author__gt='daniel')) self.sq.add_filter(SQ(created__lt=Exact('2009-02-12 12:13:00'))) self.sq.add_filter(SQ(title__gte='B')) self.sq.add_filter(SQ(id__in=[1, 2, 3])) self.sq.add_filter(SQ(rating__range=[3, 5])) self.assertEqual(self.sq.build_query(), u'((why) AND pub_date:([* TO "2009-02-10 01:59:00"]) AND author:({"daniel" TO *}) AND created:({* TO "2009-02-12 12:13:00"}) AND title:(["B" TO *]) AND id:("1" OR "2" OR "3") AND rating:(["3" TO "5"]))') def test_build_query_multiple_filter_types_with_datetimes(self): self.sq.add_filter(SQ(content='why')) self.sq.add_filter(SQ(pub_date__lte=datetime.datetime(2009, 2, 10, 1, 59, 0))) self.sq.add_filter(SQ(author__gt='daniel')) self.sq.add_filter(SQ(created__lt=datetime.datetime(2009, 2, 12, 12, 13, 0))) self.sq.add_filter(SQ(title__gte='B')) self.sq.add_filter(SQ(id__in=[1, 2, 3])) self.sq.add_filter(SQ(rating__range=[3, 5])) self.assertEqual(self.sq.build_query(), u'((why) AND pub_date:([* TO "2009-02-10T01:59:00"]) AND author:({"daniel" TO *}) AND created:({* TO "2009-02-12T12:13:00"}) AND title:(["B" TO *]) AND id:("1" OR "2" OR "3") AND rating:(["3" TO "5"]))') def test_build_query_in_filter_multiple_words(self): self.sq.add_filter(SQ(content='why')) self.sq.add_filter(SQ(title__in=["A Famous Paper", "An Infamous Article"])) self.assertEqual(self.sq.build_query(), u'((why) AND title:("A Famous Paper" OR "An Infamous Article"))') def test_build_query_in_filter_datetime(self): self.sq.add_filter(SQ(content='why')) self.sq.add_filter(SQ(pub_date__in=[datetime.datetime(2009, 7, 6, 1, 56, 21)])) self.assertEqual(self.sq.build_query(), u'((why) AND pub_date:("2009-07-06T01:56:21"))') def test_build_query_in_with_set(self): self.sq.add_filter(SQ(content='why')) self.sq.add_filter(SQ(title__in={"A Famous Paper", "An Infamous Article"})) self.assertTrue('((why) AND title:(' in self.sq.build_query()) self.assertTrue('"A Famous Paper"' in self.sq.build_query()) self.assertTrue('"An Infamous Article"' in self.sq.build_query()) def test_build_query_wildcard_filter_types(self): self.sq.add_filter(SQ(content='why')) self.sq.add_filter(SQ(title__startswith='haystack')) self.assertEqual(self.sq.build_query(), u'((why) AND title:(haystack*))') def test_build_query_fuzzy_filter_types(self): self.sq.add_filter(SQ(content='why')) self.sq.add_filter(SQ(title__fuzzy='haystack')) self.assertEqual(self.sq.build_query(), u'((why) AND title:(haystack~))') def test_clean(self): self.assertEqual(self.sq.clean('hello world'), 'hello world') self.assertEqual(self.sq.clean('hello AND world'), 'hello and world') self.assertEqual(self.sq.clean('hello AND OR NOT TO + - && || ! ( ) { } [ ] ^ " ~ * ? : \ / world'), 'hello and or not to \\+ \\- \\&& \\|| \\! \\( \\) \\{ \\} \\[ \\] \\^ \\" \\~ \\* \\? \\: \\\\ \\/ world') self.assertEqual(self.sq.clean('so please NOTe i am in a bAND and bORed'), 'so please NOTe i am in a bAND and bORed') def test_build_query_with_models(self): self.sq.add_filter(SQ(content='hello')) self.sq.add_model(MockModel) self.assertEqual(self.sq.build_query(), '(hello)') self.sq.add_model(AnotherMockModel) self.assertEqual(self.sq.build_query(), u'(hello)') def test_set_result_class(self): # Assert that we're defaulting to ``SearchResult``. self.assertTrue(issubclass(self.sq.result_class, SearchResult)) # Custom class. class IttyBittyResult(object): pass self.sq.set_result_class(IttyBittyResult) self.assertTrue(issubclass(self.sq.result_class, IttyBittyResult)) # Reset to default. self.sq.set_result_class(None) self.assertTrue(issubclass(self.sq.result_class, SearchResult)) def test_in_filter_values_list(self): self.sq.add_filter(SQ(content='why')) self.sq.add_filter(SQ(title__in=[1, 2, 3])) self.assertEqual(self.sq.build_query(), u'((why) AND title:("1" OR "2" OR "3"))') def test_narrow_sq(self): sqs = SearchQuerySet(using='elasticsearch').narrow(SQ(foo='moof')) self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.narrow_queries), 1) self.assertEqual(sqs.query.narrow_queries.pop(), 'foo:(moof)') class Elasticsearch2SearchQuerySpatialBeforeReleaseTestCase(TestCase): def setUp(self): super(Elasticsearch2SearchQuerySpatialBeforeReleaseTestCase, self).setUp() self.backend = connections['elasticsearch'].get_backend() self._elasticsearch_version = elasticsearch.VERSION elasticsearch.VERSION = (0, 9, 9) def tearDown(self): elasticsearch.VERSION = self._elasticsearch_version def test_build_query_with_dwithin_range(self): """ Test build_search_kwargs with dwithin range for Elasticsearch versions < 1.0.0 """ search_kwargs = self.backend.build_search_kwargs('where', dwithin={ 'field': "location_field", 'point': Point(1.2345678, 2.3456789), 'distance': D(m=500) }) self.assertEqual(search_kwargs['query']['filtered']['filter']['bool']['must'][1]['geo_distance'], {'distance': 0.5, 'location_field': {'lat': 2.3456789, 'lon': 1.2345678}}) class Elasticsearch2SearchQuerySpatialAfterReleaseTestCase(TestCase): def setUp(self): super(Elasticsearch2SearchQuerySpatialAfterReleaseTestCase, self).setUp() self.backend = connections['elasticsearch'].get_backend() self._elasticsearch_version = elasticsearch.VERSION elasticsearch.VERSION = (1, 0, 0) def tearDown(self): elasticsearch.VERSION = self._elasticsearch_version def test_build_query_with_dwithin_range(self): """ Test build_search_kwargs with dwithin range for Elasticsearch versions >= 1.0.0 """ search_kwargs = self.backend.build_search_kwargs('where', dwithin={ 'field': "location_field", 'point': Point(1.2345678, 2.3456789), 'distance': D(m=500) }) self.assertEqual(search_kwargs['query']['filtered']['filter']['bool']['must'][1]['geo_distance'], {'distance': "0.500000km", 'location_field': {'lat': 2.3456789, 'lon': 1.2345678}}) django-haystack-2.8.0/test_haystack/elasticsearch_tests/000077500000000000000000000000001325051407000234675ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/elasticsearch_tests/__init__.py000066400000000000000000000021021325051407000255730ustar00rootroot00000000000000# encoding: utf-8 import unittest import warnings from django.conf import settings from haystack.utils import log as logging warnings.simplefilter('ignore', Warning) def setup(): log = logging.getLogger('haystack') try: import elasticsearch if not ((1, 0, 0) <= elasticsearch.__version__ < (2, 0, 0)): raise ImportError from elasticsearch import Elasticsearch, ElasticsearchException except ImportError: log.error("Skipping ElasticSearch 1 tests: 'elasticsearch>=1.0.0,<2.0.0' not installed.") raise unittest.SkipTest("'elasticsearch>=1.0.0,<2.0.0' not installed.") es = Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL']) try: es.info() except ElasticsearchException as e: log.error("elasticsearch not running on %r" % \ settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'], exc_info=True) raise unittest.SkipTest("elasticsearch not running on %r" % \ settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'], e) django-haystack-2.8.0/test_haystack/elasticsearch_tests/test_elasticsearch_backend.py000066400000000000000000001750161325051407000313730ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import absolute_import, division, print_function, unicode_literals import datetime import logging as std_logging import operator import unittest from contextlib import contextmanager from decimal import Decimal import elasticsearch from django.apps import apps from django.conf import settings from django.test import TestCase from django.test.utils import override_settings from haystack import connections, indexes, reset_search_queries from haystack.exceptions import SkipDocument from haystack.inputs import AutoQuery from haystack.models import SearchResult from haystack.query import SQ, RelatedSearchQuerySet, SearchQuerySet from haystack.utils import log as logging from haystack.utils.geo import Point from haystack.utils.loading import UnifiedIndex from ..core.models import AFourthMockModel, AnotherMockModel, ASixthMockModel, MockModel from ..mocks import MockSearchResult test_pickling = True try: import cPickle as pickle except ImportError: try: import pickle except ImportError: test_pickling = False def clear_elasticsearch_index(): # Wipe it clean. raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL']) try: raw_es.indices.delete(index=settings.HAYSTACK_CONNECTIONS['elasticsearch']['INDEX_NAME']) raw_es.indices.refresh() except elasticsearch.TransportError: pass # Since we've just completely deleted the index, we'll reset setup_complete so the next access will # correctly define the mappings: connections['elasticsearch'].get_backend().setup_complete = False class ElasticsearchMockSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) name = indexes.CharField(model_attr='author', faceted=True) pub_date = indexes.DateTimeField(model_attr='pub_date') def get_model(self): return MockModel class ElasticsearchMockSearchIndexWithSkipDocument(ElasticsearchMockSearchIndex): def prepare_text(self, obj): if obj.author == 'daniel3': raise SkipDocument return u"Indexed!\n%s" % obj.id class ElasticsearchMockSpellingIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True) name = indexes.CharField(model_attr='author', faceted=True) pub_date = indexes.DateTimeField(model_attr='pub_date') def get_model(self): return MockModel def prepare_text(self, obj): return obj.foo class ElasticsearchMaintainTypeMockSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) month = indexes.CharField(indexed=False) pub_date = indexes.DateTimeField(model_attr='pub_date') def prepare_month(self, obj): return "%02d" % obj.pub_date.month def get_model(self): return MockModel class ElasticsearchMockModelSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(model_attr='foo', document=True) name = indexes.CharField(model_attr='author') pub_date = indexes.DateTimeField(model_attr='pub_date') def get_model(self): return MockModel class ElasticsearchAnotherMockModelSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True) name = indexes.CharField(model_attr='author') pub_date = indexes.DateTimeField(model_attr='pub_date') def get_model(self): return AnotherMockModel def prepare_text(self, obj): return u"You might be searching for the user %s" % obj.author class ElasticsearchBoostMockSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField( document=True, use_template=True, template_name='search/indexes/core/mockmodel_template.txt' ) author = indexes.CharField(model_attr='author', weight=2.0) editor = indexes.CharField(model_attr='editor') pub_date = indexes.DateTimeField(model_attr='pub_date') def get_model(self): return AFourthMockModel def prepare(self, obj): data = super(ElasticsearchBoostMockSearchIndex, self).prepare(obj) if obj.pk == 4: data['boost'] = 5.0 return data class ElasticsearchFacetingMockSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True) author = indexes.CharField(model_attr='author', faceted=True) editor = indexes.CharField(model_attr='editor', faceted=True) pub_date = indexes.DateField(model_attr='pub_date', faceted=True) facet_field = indexes.FacetCharField(model_attr='author') def prepare_text(self, obj): return '%s %s' % (obj.author, obj.editor) def get_model(self): return AFourthMockModel class ElasticsearchRoundTripSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, default='') name = indexes.CharField() is_active = indexes.BooleanField() post_count = indexes.IntegerField() average_rating = indexes.FloatField() price = indexes.DecimalField() pub_date = indexes.DateField() created = indexes.DateTimeField() tags = indexes.MultiValueField() sites = indexes.MultiValueField() def get_model(self): return MockModel def prepare(self, obj): prepped = super(ElasticsearchRoundTripSearchIndex, self).prepare(obj) prepped.update({ 'text': 'This is some example text.', 'name': 'Mister Pants', 'is_active': True, 'post_count': 25, 'average_rating': 3.6, 'price': Decimal('24.99'), 'pub_date': datetime.date(2009, 11, 21), 'created': datetime.datetime(2009, 11, 21, 21, 31, 00), 'tags': ['staff', 'outdoor', 'activist', 'scientist'], 'sites': [3, 5, 1], }) return prepped class ElasticsearchComplexFacetsMockSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, default='') name = indexes.CharField(faceted=True) is_active = indexes.BooleanField(faceted=True) post_count = indexes.IntegerField() post_count_i = indexes.FacetIntegerField(facet_for='post_count') average_rating = indexes.FloatField(faceted=True) pub_date = indexes.DateField(faceted=True) created = indexes.DateTimeField(faceted=True) sites = indexes.MultiValueField(faceted=True) def get_model(self): return MockModel class ElasticsearchAutocompleteMockModelSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(model_attr='foo', document=True) name = indexes.CharField(model_attr='author') pub_date = indexes.DateTimeField(model_attr='pub_date') text_auto = indexes.EdgeNgramField(model_attr='foo') name_auto = indexes.EdgeNgramField(model_attr='author') def get_model(self): return MockModel class ElasticsearchSpatialSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(model_attr='name', document=True) location = indexes.LocationField() def prepare_location(self, obj): return "%s,%s" % (obj.lat, obj.lon) def get_model(self): return ASixthMockModel class TestSettings(TestCase): def test_kwargs_are_passed_on(self): from haystack.backends.elasticsearch_backend import ElasticsearchSearchBackend backend = ElasticsearchSearchBackend('alias', **{ 'URL': settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'], 'INDEX_NAME': 'testing', 'KWARGS': {'max_retries': 42} }) self.assertEqual(backend.conn.transport.max_retries, 42) class ElasticSearchMockUnifiedIndex(UnifiedIndex): spy_args = None def get_index(self, model_klass): if self.spy_args is not None: self.spy_args.setdefault('get_index', []).append(model_klass) return super(ElasticSearchMockUnifiedIndex, self).get_index(model_klass) @contextmanager def spy(self): try: self.spy_args = {} yield self.spy_args finally: self.spy_args = None class ElasticsearchSearchBackendTestCase(TestCase): def setUp(self): super(ElasticsearchSearchBackendTestCase, self).setUp() # Wipe it clean. self.raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL']) clear_elasticsearch_index() # Stow. self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = ElasticSearchMockUnifiedIndex() self.smmi = ElasticsearchMockSearchIndex() self.smmidni = ElasticsearchMockSearchIndexWithSkipDocument() self.smtmmi = ElasticsearchMaintainTypeMockSearchIndex() self.ui.build(indexes=[self.smmi]) connections['elasticsearch']._index = self.ui self.sb = connections['elasticsearch'].get_backend() # Force the backend to rebuild the mapping each time. self.sb.existing_mapping = {} self.sb.setup() self.sample_objs = [] for i in range(1, 4): mock = MockModel() mock.id = i mock.author = 'daniel%s' % i mock.pub_date = datetime.date(2009, 2, 25) - datetime.timedelta(days=i) self.sample_objs.append(mock) def tearDown(self): connections['elasticsearch']._index = self.old_ui super(ElasticsearchSearchBackendTestCase, self).tearDown() self.sb.silently_fail = True def raw_search(self, query): try: return self.raw_es.search(q='*:*', index=settings.HAYSTACK_CONNECTIONS['elasticsearch']['INDEX_NAME']) except elasticsearch.TransportError: return {} def test_non_silent(self): bad_sb = connections['elasticsearch'].backend('bad', URL='http://omg.wtf.bbq:1000/', INDEX_NAME='whatver', SILENTLY_FAIL=False, TIMEOUT=1) try: bad_sb.update(self.smmi, self.sample_objs) self.fail() except: pass try: bad_sb.remove('core.mockmodel.1') self.fail() except: pass try: bad_sb.clear() self.fail() except: pass try: bad_sb.search('foo') self.fail() except: pass def test_update_no_documents(self): url = settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'] index_name = settings.HAYSTACK_CONNECTIONS['elasticsearch']['INDEX_NAME'] sb = connections['elasticsearch'].backend('elasticsearch', URL=url, INDEX_NAME=index_name, SILENTLY_FAIL=True) self.assertEqual(sb.update(self.smmi, []), None) sb = connections['elasticsearch'].backend('elasticsearch', URL=url, INDEX_NAME=index_name, SILENTLY_FAIL=False) try: sb.update(self.smmi, []) self.fail() except: pass def test_update(self): self.sb.update(self.smmi, self.sample_objs) # Check what Elasticsearch thinks is there. self.assertEqual(self.raw_search('*:*')['hits']['total'], 3) self.assertEqual(sorted([res['_source'] for res in self.raw_search('*:*')['hits']['hits']], key=lambda x: x['id']), [ { 'django_id': '1', 'django_ct': 'core.mockmodel', 'name': 'daniel1', 'name_exact': 'daniel1', 'text': 'Indexed!\n1', 'pub_date': '2009-02-24T00:00:00', 'id': 'core.mockmodel.1' }, { 'django_id': '2', 'django_ct': 'core.mockmodel', 'name': 'daniel2', 'name_exact': 'daniel2', 'text': 'Indexed!\n2', 'pub_date': '2009-02-23T00:00:00', 'id': 'core.mockmodel.2' }, { 'django_id': '3', 'django_ct': 'core.mockmodel', 'name': 'daniel3', 'name_exact': 'daniel3', 'text': 'Indexed!\n3', 'pub_date': '2009-02-22T00:00:00', 'id': 'core.mockmodel.3' } ]) def test_update_with_SkipDocument_raised(self): self.sb.update(self.smmidni, self.sample_objs) # Check what Elasticsearch thinks is there. res = self.raw_search('*:*')['hits'] self.assertEqual(res['total'], 2) self.assertListEqual( sorted([x['_source']['id'] for x in res['hits']]), ['core.mockmodel.1', 'core.mockmodel.2'] ) def test_remove(self): self.sb.update(self.smmi, self.sample_objs) self.assertEqual(self.raw_search('*:*')['hits']['total'], 3) self.sb.remove(self.sample_objs[0]) self.assertEqual(self.raw_search('*:*')['hits']['total'], 2) self.assertEqual(sorted([res['_source'] for res in self.raw_search('*:*')['hits']['hits']], key=operator.itemgetter('django_id')), [ { 'django_id': '2', 'django_ct': 'core.mockmodel', 'name': 'daniel2', 'name_exact': 'daniel2', 'text': 'Indexed!\n2', 'pub_date': '2009-02-23T00:00:00', 'id': 'core.mockmodel.2' }, { 'django_id': '3', 'django_ct': 'core.mockmodel', 'name': 'daniel3', 'name_exact': 'daniel3', 'text': 'Indexed!\n3', 'pub_date': '2009-02-22T00:00:00', 'id': 'core.mockmodel.3' } ]) def test_remove_succeeds_on_404(self): self.sb.silently_fail = False self.sb.remove('core.mockmodel.421') def test_clear(self): self.sb.update(self.smmi, self.sample_objs) self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 3) self.sb.clear() self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 0) self.sb.update(self.smmi, self.sample_objs) self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 3) self.sb.clear([AnotherMockModel]) self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 3) self.sb.clear([MockModel]) self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 0) self.sb.update(self.smmi, self.sample_objs) self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 3) self.sb.clear([AnotherMockModel, MockModel]) self.assertEqual(self.raw_search('*:*').get('hits', {}).get('total', 0), 0) def test_results_ask_for_index_per_entry(self): # Test that index class is obtained per result entry, not per every entry field self.sb.update(self.smmi, self.sample_objs) with self.ui.spy() as spy: self.sb.search('*:*', limit_to_registered_models=False) self.assertEqual(len(spy.get('get_index', [])), len(self.sample_objs)) def test_search(self): self.sb.update(self.smmi, self.sample_objs) self.assertEqual(self.raw_search('*:*')['hits']['total'], 3) self.assertEqual(self.sb.search(''), {'hits': 0, 'results': []}) self.assertEqual(self.sb.search('*:*')['hits'], 3) self.assertEqual(set([result.pk for result in self.sb.search('*:*')['results']]), set([u'2', u'1', u'3'])) self.assertEqual(self.sb.search('', highlight=True), {'hits': 0, 'results': []}) self.assertEqual(self.sb.search('Index', highlight=True)['hits'], 3) self.assertEqual(sorted([result.highlighted[0] for result in self.sb.search('Index', highlight=True)['results']]), [u'Indexed!\n1', u'Indexed!\n2', u'Indexed!\n3']) self.assertEqual(sorted([result.highlighted[0] for result in self.sb.search('Index', highlight={'pre_tags': [''],'post_tags': ['']})['results']]), [u'Indexed!\n1', u'Indexed!\n2', u'Indexed!\n3']) self.assertEqual(self.sb.search('Indx')['hits'], 0) self.assertEqual(self.sb.search('indaxed')['spelling_suggestion'], 'indexed') self.assertEqual(self.sb.search('arf', spelling_query='indexyd')['spelling_suggestion'], 'indexed') self.assertEqual(self.sb.search('', facets={'name': {}}), {'hits': 0, 'results': []}) results = self.sb.search('Index', facets={'name': {}}) self.assertEqual(results['hits'], 3) self.assertEqual(results['facets']['fields']['name'], [('daniel3', 1), ('daniel2', 1), ('daniel1', 1)]) self.assertEqual(self.sb.search('', date_facets={'pub_date': {'start_date': datetime.date(2008, 1, 1), 'end_date': datetime.date(2009, 4, 1), 'gap_by': 'month', 'gap_amount': 1}}), {'hits': 0, 'results': []}) results = self.sb.search('Index', date_facets={'pub_date': {'start_date': datetime.date(2008, 1, 1), 'end_date': datetime.date(2009, 4, 1), 'gap_by': 'month', 'gap_amount': 1}}) self.assertEqual(results['hits'], 3) self.assertEqual(results['facets']['dates']['pub_date'], [(datetime.datetime(2009, 2, 1, 0, 0), 3)]) self.assertEqual(self.sb.search('', query_facets=[('name', '[* TO e]')]), {'hits': 0, 'results': []}) results = self.sb.search('Index', query_facets=[('name', '[* TO e]')]) self.assertEqual(results['hits'], 3) self.assertEqual(results['facets']['queries'], {u'name': 3}) self.assertEqual(self.sb.search('', narrow_queries=set(['name:daniel1'])), {'hits': 0, 'results': []}) results = self.sb.search('Index', narrow_queries=set(['name:daniel1'])) self.assertEqual(results['hits'], 1) # Ensure that swapping the ``result_class`` works. self.assertTrue(isinstance(self.sb.search(u'index', result_class=MockSearchResult)['results'][0], MockSearchResult)) # Check the use of ``limit_to_registered_models``. self.assertEqual(self.sb.search('', limit_to_registered_models=False), {'hits': 0, 'results': []}) self.assertEqual(self.sb.search('*:*', limit_to_registered_models=False)['hits'], 3) self.assertEqual(sorted([result.pk for result in self.sb.search('*:*', limit_to_registered_models=False)['results']]), ['1', '2', '3']) # Stow. old_limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) settings.HAYSTACK_LIMIT_TO_REGISTERED_MODELS = False self.assertEqual(self.sb.search(''), {'hits': 0, 'results': []}) self.assertEqual(self.sb.search('*:*')['hits'], 3) self.assertEqual(sorted([result.pk for result in self.sb.search('*:*')['results']]), ['1', '2', '3']) # Restore. settings.HAYSTACK_LIMIT_TO_REGISTERED_MODELS = old_limit_to_registered_models def test_spatial_search_parameters(self): p1 = Point(1.23, 4.56) kwargs = self.sb.build_search_kwargs('*:*', distance_point={'field': 'location', 'point': p1}, sort_by=(('distance', 'desc'), )) self.assertIn('sort', kwargs) self.assertEqual(1, len(kwargs['sort'])) geo_d = kwargs['sort'][0]['_geo_distance'] # ElasticSearch supports the GeoJSON-style lng, lat pairs so unlike Solr the values should be # in the same order as we used to create the Point(): # http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl-geo-distance-filter.html#_lat_lon_as_array_4 self.assertDictEqual(geo_d, {'location': [1.23, 4.56], 'unit': 'km', 'order': 'desc'}) def test_more_like_this(self): self.sb.update(self.smmi, self.sample_objs) self.assertEqual(self.raw_search('*:*')['hits']['total'], 3) # A functional MLT example with enough data to work is below. Rely on # this to ensure the API is correct enough. self.assertEqual(self.sb.more_like_this(self.sample_objs[0])['hits'], 0) self.assertEqual([result.pk for result in self.sb.more_like_this(self.sample_objs[0])['results']], []) def test_build_schema(self): old_ui = connections['elasticsearch'].get_unified_index() (content_field_name, mapping) = self.sb.build_schema(old_ui.all_searchfields()) self.assertEqual(content_field_name, 'text') self.assertEqual(len(mapping), 4 + 2) # +2 management fields self.assertEqual(mapping, { 'django_id': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, 'django_ct': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, 'text': {'type': 'string', 'analyzer': 'snowball'}, 'pub_date': {'type': 'date'}, 'name': {'type': 'string', 'analyzer': 'snowball'}, 'name_exact': {'index': 'not_analyzed', 'type': 'string'} }) ui = UnifiedIndex() ui.build(indexes=[ElasticsearchComplexFacetsMockSearchIndex()]) (content_field_name, mapping) = self.sb.build_schema(ui.all_searchfields()) self.assertEqual(content_field_name, 'text') self.assertEqual(len(mapping), 15 + 2) # +2 management fields self.assertEqual(mapping, { 'django_id': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, 'django_ct': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, 'name': {'type': 'string', 'analyzer': 'snowball'}, 'is_active_exact': {'type': 'boolean'}, 'created': {'type': 'date'}, 'post_count': {'type': 'long'}, 'created_exact': {'type': 'date'}, 'sites_exact': {'index': 'not_analyzed', 'type': 'string'}, 'is_active': {'type': 'boolean'}, 'sites': {'type': 'string', 'analyzer': 'snowball'}, 'post_count_i': {'type': 'long'}, 'average_rating': {'type': 'float'}, 'text': {'type': 'string', 'analyzer': 'snowball'}, 'pub_date_exact': {'type': 'date'}, 'name_exact': {'index': 'not_analyzed', 'type': 'string'}, 'pub_date': {'type': 'date'}, 'average_rating_exact': {'type': 'float'} }) def test_verify_type(self): old_ui = connections['elasticsearch'].get_unified_index() ui = UnifiedIndex() smtmmi = ElasticsearchMaintainTypeMockSearchIndex() ui.build(indexes=[smtmmi]) connections['elasticsearch']._index = ui sb = connections['elasticsearch'].get_backend() sb.update(smtmmi, self.sample_objs) self.assertEqual(sb.search('*:*')['hits'], 3) self.assertEqual([result.month for result in sb.search('*:*')['results']], [u'02', u'02', u'02']) connections['elasticsearch']._index = old_ui class CaptureHandler(std_logging.Handler): logs_seen = [] def emit(self, record): CaptureHandler.logs_seen.append(record) class FailedElasticsearchSearchBackendTestCase(TestCase): def setUp(self): self.sample_objs = [] for i in range(1, 4): mock = MockModel() mock.id = i mock.author = 'daniel%s' % i mock.pub_date = datetime.date(2009, 2, 25) - datetime.timedelta(days=i) self.sample_objs.append(mock) # Stow. # Point the backend at a URL that doesn't exist so we can watch the # sparks fly. self.old_es_url = settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'] settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'] = "%s/foo/" % self.old_es_url self.cap = CaptureHandler() logging.getLogger('haystack').addHandler(self.cap) config = apps.get_app_config('haystack') logging.getLogger('haystack').removeHandler(config.stream) # Setup the rest of the bits. self.old_ui = connections['elasticsearch'].get_unified_index() ui = UnifiedIndex() self.smmi = ElasticsearchMockSearchIndex() ui.build(indexes=[self.smmi]) connections['elasticsearch']._index = ui self.sb = connections['elasticsearch'].get_backend() def tearDown(self): # Restore. settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL'] = self.old_es_url connections['elasticsearch']._index = self.old_ui config = apps.get_app_config('haystack') logging.getLogger('haystack').removeHandler(self.cap) logging.getLogger('haystack').addHandler(config.stream) @unittest.expectedFailure def test_all_cases(self): # Prior to the addition of the try/except bits, these would all fail miserably. self.assertEqual(len(CaptureHandler.logs_seen), 0) self.sb.update(self.smmi, self.sample_objs) self.assertEqual(len(CaptureHandler.logs_seen), 1) self.sb.remove(self.sample_objs[0]) self.assertEqual(len(CaptureHandler.logs_seen), 2) self.sb.search('search') self.assertEqual(len(CaptureHandler.logs_seen), 3) self.sb.more_like_this(self.sample_objs[0]) self.assertEqual(len(CaptureHandler.logs_seen), 4) self.sb.clear([MockModel]) self.assertEqual(len(CaptureHandler.logs_seen), 5) self.sb.clear() self.assertEqual(len(CaptureHandler.logs_seen), 6) class LiveElasticsearchSearchQueryTestCase(TestCase): fixtures = ['base_data.json'] def setUp(self): super(LiveElasticsearchSearchQueryTestCase, self).setUp() # Wipe it clean. clear_elasticsearch_index() # Stow. self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = ElasticsearchMockSearchIndex() self.ui.build(indexes=[self.smmi]) connections['elasticsearch']._index = self.ui self.sb = connections['elasticsearch'].get_backend() self.sq = connections['elasticsearch'].get_query() # Force indexing of the content. self.smmi.update(using='elasticsearch') def tearDown(self): connections['elasticsearch']._index = self.old_ui super(LiveElasticsearchSearchQueryTestCase, self).tearDown() def test_log_query(self): reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) with self.settings(DEBUG=False): len(self.sq.get_results()) self.assertEqual(len(connections['elasticsearch'].queries), 0) with self.settings(DEBUG=True): # Redefine it to clear out the cached results. self.sq = connections['elasticsearch'].query(using='elasticsearch') self.sq.add_filter(SQ(name='bar')) len(self.sq.get_results()) self.assertEqual(len(connections['elasticsearch'].queries), 1) self.assertEqual(connections['elasticsearch'].queries[0]['query_string'], 'name:(bar)') # And again, for good measure. self.sq = connections['elasticsearch'].query('elasticsearch') self.sq.add_filter(SQ(name='bar')) self.sq.add_filter(SQ(text='moof')) len(self.sq.get_results()) self.assertEqual(len(connections['elasticsearch'].queries), 2) self.assertEqual(connections['elasticsearch'].queries[0]['query_string'], 'name:(bar)') self.assertEqual(connections['elasticsearch'].queries[1]['query_string'], u'(name:(bar) AND text:(moof))') lssqstc_all_loaded = None @override_settings(DEBUG=True) class LiveElasticsearchSearchQuerySetTestCase(TestCase): """Used to test actual implementation details of the SearchQuerySet.""" fixtures = ['base_data.json', 'bulk_data.json'] def setUp(self): super(LiveElasticsearchSearchQuerySetTestCase, self).setUp() # Stow. self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = ElasticsearchMockSearchIndex() self.ui.build(indexes=[self.smmi]) connections['elasticsearch']._index = self.ui self.sqs = SearchQuerySet('elasticsearch') self.rsqs = RelatedSearchQuerySet('elasticsearch') # Ugly but not constantly reindexing saves us almost 50% runtime. global lssqstc_all_loaded if lssqstc_all_loaded is None: lssqstc_all_loaded = True # Wipe it clean. clear_elasticsearch_index() # Force indexing of the content. self.smmi.update(using='elasticsearch') def tearDown(self): # Restore. connections['elasticsearch']._index = self.old_ui super(LiveElasticsearchSearchQuerySetTestCase, self).tearDown() def test_load_all(self): sqs = self.sqs.order_by('pub_date').load_all() self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertTrue(len(sqs) > 0) self.assertEqual(sqs[2].object.foo, u'In addition, you may specify other fields to be populated along with the document. In this case, we also index the user who authored the document as well as the date the document was published. The variable you assign the SearchField to should directly map to the field your search backend is expecting. You instantiate most search fields with a parameter that points to the attribute of the object to populate that field with.') def test_iter(self): reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) sqs = self.sqs.all() results = sorted([int(result.pk) for result in list(sqs)]) self.assertEqual(results, list(range(1, 24))) self.assertEqual(len(connections['elasticsearch'].queries), 4) def test_slice(self): reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.sqs.all().order_by('pub_date') self.assertEqual([int(result.pk) for result in results[1:11]], [3, 2, 4, 5, 6, 7, 8, 9, 10, 11]) self.assertEqual(len(connections['elasticsearch'].queries), 1) reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.sqs.all().order_by('pub_date') self.assertEqual(int(results[21].pk), 22) self.assertEqual(len(connections['elasticsearch'].queries), 1) def test_values_slicing(self): reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) # TODO: this would be a good candidate for refactoring into a TestCase subclass shared across backends # The values will come back as strings because Hasytack doesn't assume PKs are integers. # We'll prepare this set once since we're going to query the same results in multiple ways: expected_pks = [str(i) for i in [3, 2, 4, 5, 6, 7, 8, 9, 10, 11]] results = self.sqs.all().order_by('pub_date').values('pk') self.assertListEqual([i['pk'] for i in results[1:11]], expected_pks) results = self.sqs.all().order_by('pub_date').values_list('pk') self.assertListEqual([i[0] for i in results[1:11]], expected_pks) results = self.sqs.all().order_by('pub_date').values_list('pk', flat=True) self.assertListEqual(results[1:11], expected_pks) self.assertEqual(len(connections['elasticsearch'].queries), 3) def test_count(self): reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) sqs = self.sqs.all() self.assertEqual(sqs.count(), 23) self.assertEqual(sqs.count(), 23) self.assertEqual(len(sqs), 23) self.assertEqual(sqs.count(), 23) # Should only execute one query to count the length of the result set. self.assertEqual(len(connections['elasticsearch'].queries), 1) def test_highlight(self): reset_search_queries() results = self.sqs.filter(content='index').highlight() self.assertEqual(results[0].highlighted, [u'Indexed!\n1']) def test_highlight_options(self): reset_search_queries() results = self.sqs.filter(content='index') results = results.highlight(pre_tags=[''], post_tags=['']) self.assertEqual(results[0].highlighted, [u'Indexed!\n1']) def test_manual_iter(self): results = self.sqs.all() reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) results = set([int(result.pk) for result in results._manual_iter()]) self.assertEqual(results, set([2, 7, 12, 17, 1, 6, 11, 16, 23, 5, 10, 15, 22, 4, 9, 14, 19, 21, 3, 8, 13, 18, 20])) self.assertEqual(len(connections['elasticsearch'].queries), 3) def test_fill_cache(self): reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.sqs.all() self.assertEqual(len(results._result_cache), 0) self.assertEqual(len(connections['elasticsearch'].queries), 0) results._fill_cache(0, 10) self.assertEqual(len([result for result in results._result_cache if result is not None]), 10) self.assertEqual(len(connections['elasticsearch'].queries), 1) results._fill_cache(10, 20) self.assertEqual(len([result for result in results._result_cache if result is not None]), 20) self.assertEqual(len(connections['elasticsearch'].queries), 2) def test_cache_is_full(self): reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) self.assertEqual(self.sqs._cache_is_full(), False) results = self.sqs.all() fire_the_iterator_and_fill_cache = list(results) self.assertEqual(23, len(fire_the_iterator_and_fill_cache)) self.assertEqual(results._cache_is_full(), True) self.assertEqual(len(connections['elasticsearch'].queries), 4) def test___and__(self): sqs1 = self.sqs.filter(content='foo') sqs2 = self.sqs.filter(content='bar') sqs = sqs1 & sqs2 self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.query_filter), 2) self.assertEqual(sqs.query.build_query(), u'((foo) AND (bar))') # Now for something more complex... sqs3 = self.sqs.exclude(title='moof').filter(SQ(content='foo') | SQ(content='baz')) sqs4 = self.sqs.filter(content='bar') sqs = sqs3 & sqs4 self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.query_filter), 3) self.assertEqual(sqs.query.build_query(), u'(NOT (title:(moof)) AND ((foo) OR (baz)) AND (bar))') def test___or__(self): sqs1 = self.sqs.filter(content='foo') sqs2 = self.sqs.filter(content='bar') sqs = sqs1 | sqs2 self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.query_filter), 2) self.assertEqual(sqs.query.build_query(), u'((foo) OR (bar))') # Now for something more complex... sqs3 = self.sqs.exclude(title='moof').filter(SQ(content='foo') | SQ(content='baz')) sqs4 = self.sqs.filter(content='bar').models(MockModel) sqs = sqs3 | sqs4 self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.query_filter), 2) self.assertEqual(sqs.query.build_query(), u'((NOT (title:(moof)) AND ((foo) OR (baz))) OR (bar))') def test_auto_query(self): # Ensure bits in exact matches get escaped properly as well. # This will break horrifically if escaping isn't working. sqs = self.sqs.auto_query('"pants:rule"') self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(repr(sqs.query.query_filter), '') self.assertEqual(sqs.query.build_query(), u'("pants\\:rule")') self.assertEqual(len(sqs), 0) def test_query__in(self): self.assertGreater(len(self.sqs), 0) sqs = self.sqs.filter(django_ct='core.mockmodel', django_id__in=[1, 2]) self.assertEqual(len(sqs), 2) def test_query__in_empty_list(self): """Confirm that an empty list avoids a Elasticsearch exception""" self.assertGreater(len(self.sqs), 0) sqs = self.sqs.filter(id__in=[]) self.assertEqual(len(sqs), 0) # Regressions def test_regression_proper_start_offsets(self): sqs = self.sqs.filter(text='index') self.assertNotEqual(sqs.count(), 0) id_counts = {} for item in sqs: if item.id in id_counts: id_counts[item.id] += 1 else: id_counts[item.id] = 1 for key, value in id_counts.items(): if value > 1: self.fail("Result with id '%s' seen more than once in the results." % key) def test_regression_raw_search_breaks_slicing(self): sqs = self.sqs.raw_search('text:index') page_1 = [result.pk for result in sqs[0:10]] page_2 = [result.pk for result in sqs[10:20]] for pk in page_2: if pk in page_1: self.fail("Result with id '%s' seen more than once in the results." % pk) # RelatedSearchQuerySet Tests def test_related_load_all(self): sqs = self.rsqs.order_by('pub_date').load_all() self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertTrue(len(sqs) > 0) self.assertEqual(sqs[2].object.foo, u'In addition, you may specify other fields to be populated along with the document. In this case, we also index the user who authored the document as well as the date the document was published. The variable you assign the SearchField to should directly map to the field your search backend is expecting. You instantiate most search fields with a parameter that points to the attribute of the object to populate that field with.') def test_related_load_all_queryset(self): sqs = self.rsqs.load_all().order_by('pub_date') self.assertEqual(len(sqs._load_all_querysets), 0) sqs = sqs.load_all_queryset(MockModel, MockModel.objects.filter(id__gt=1)) self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs._load_all_querysets), 1) self.assertEqual(sorted([obj.object.id for obj in sqs]), list(range(2, 24))) sqs = sqs.load_all_queryset(MockModel, MockModel.objects.filter(id__gt=10)) self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs._load_all_querysets), 1) self.assertEqual(set([obj.object.id for obj in sqs]), set([12, 17, 11, 16, 23, 15, 22, 14, 19, 21, 13, 18, 20])) self.assertEqual(set([obj.object.id for obj in sqs[10:20]]), set([21, 22, 23])) def test_related_iter(self): reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) sqs = self.rsqs.all() results = set([int(result.pk) for result in list(sqs)]) self.assertEqual(results, set([2, 7, 12, 17, 1, 6, 11, 16, 23, 5, 10, 15, 22, 4, 9, 14, 19, 21, 3, 8, 13, 18, 20])) self.assertEqual(len(connections['elasticsearch'].queries), 4) def test_related_slice(self): reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.rsqs.all().order_by('pub_date') self.assertEqual([int(result.pk) for result in results[1:11]], [3, 2, 4, 5, 6, 7, 8, 9, 10, 11]) self.assertEqual(len(connections['elasticsearch'].queries), 1) reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.rsqs.all().order_by('pub_date') self.assertEqual(int(results[21].pk), 22) self.assertEqual(len(connections['elasticsearch'].queries), 1) reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.rsqs.all().order_by('pub_date') self.assertEqual(set([int(result.pk) for result in results[20:30]]), set([21, 22, 23])) self.assertEqual(len(connections['elasticsearch'].queries), 1) def test_related_manual_iter(self): results = self.rsqs.all() reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) results = sorted([int(result.pk) for result in results._manual_iter()]) self.assertEqual(results, list(range(1, 24))) self.assertEqual(len(connections['elasticsearch'].queries), 3) def test_related_fill_cache(self): reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) results = self.rsqs.all() self.assertEqual(len(results._result_cache), 0) self.assertEqual(len(connections['elasticsearch'].queries), 0) results._fill_cache(0, 10) self.assertEqual(len([result for result in results._result_cache if result is not None]), 10) self.assertEqual(len(connections['elasticsearch'].queries), 1) results._fill_cache(10, 20) self.assertEqual(len([result for result in results._result_cache if result is not None]), 20) self.assertEqual(len(connections['elasticsearch'].queries), 2) def test_related_cache_is_full(self): reset_search_queries() self.assertEqual(len(connections['elasticsearch'].queries), 0) self.assertEqual(self.rsqs._cache_is_full(), False) results = self.rsqs.all() fire_the_iterator_and_fill_cache = list(results) self.assertEqual(23, len(fire_the_iterator_and_fill_cache)) self.assertEqual(results._cache_is_full(), True) self.assertEqual(len(connections['elasticsearch'].queries), 4) def test_quotes_regression(self): sqs = self.sqs.auto_query(u"44°48'40''N 20°28'32''E") # Should not have empty terms. self.assertEqual(sqs.query.build_query(), u"(44\xb048'40''N 20\xb028'32''E)") # Should not cause Elasticsearch to 500. self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('blazing') self.assertEqual(sqs.query.build_query(), u'(blazing)') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('blazing saddles') self.assertEqual(sqs.query.build_query(), u'(blazing saddles)') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('"blazing saddles') self.assertEqual(sqs.query.build_query(), u'(\\"blazing saddles)') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('"blazing saddles"') self.assertEqual(sqs.query.build_query(), u'("blazing saddles")') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('mel "blazing saddles"') self.assertEqual(sqs.query.build_query(), u'(mel "blazing saddles")') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('mel "blazing \'saddles"') self.assertEqual(sqs.query.build_query(), u'(mel "blazing \'saddles")') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('mel "blazing \'\'saddles"') self.assertEqual(sqs.query.build_query(), u'(mel "blazing \'\'saddles")') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('mel "blazing \'\'saddles"\'') self.assertEqual(sqs.query.build_query(), u'(mel "blazing \'\'saddles" \')') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('mel "blazing \'\'saddles"\'"') self.assertEqual(sqs.query.build_query(), u'(mel "blazing \'\'saddles" \'\\")') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('"blazing saddles" mel') self.assertEqual(sqs.query.build_query(), u'("blazing saddles" mel)') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('"blazing saddles" mel brooks') self.assertEqual(sqs.query.build_query(), u'("blazing saddles" mel brooks)') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('mel "blazing saddles" brooks') self.assertEqual(sqs.query.build_query(), u'(mel "blazing saddles" brooks)') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('mel "blazing saddles" "brooks') self.assertEqual(sqs.query.build_query(), u'(mel "blazing saddles" \\"brooks)') self.assertEqual(sqs.count(), 0) def test_query_generation(self): sqs = self.sqs.filter(SQ(content=AutoQuery("hello world")) | SQ(title=AutoQuery("hello world"))) self.assertEqual(sqs.query.build_query(), u"((hello world) OR title:(hello world))") def test_result_class(self): # Assert that we're defaulting to ``SearchResult``. sqs = self.sqs.all() self.assertTrue(isinstance(sqs[0], SearchResult)) # Custom class. sqs = self.sqs.result_class(MockSearchResult).all() self.assertTrue(isinstance(sqs[0], MockSearchResult)) # Reset to default. sqs = self.sqs.result_class(None).all() self.assertTrue(isinstance(sqs[0], SearchResult)) @override_settings(DEBUG=True) class LiveElasticsearchSpellingTestCase(TestCase): """Used to test actual implementation details of the SearchQuerySet.""" fixtures = ['base_data.json', 'bulk_data.json'] def setUp(self): super(LiveElasticsearchSpellingTestCase, self).setUp() # Stow. self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = ElasticsearchMockSpellingIndex() self.ui.build(indexes=[self.smmi]) connections['elasticsearch']._index = self.ui self.sqs = SearchQuerySet('elasticsearch') # Wipe it clean. clear_elasticsearch_index() # Reboot the schema. self.sb = connections['elasticsearch'].get_backend() self.sb.setup() self.smmi.update(using='elasticsearch') def tearDown(self): # Restore. connections['elasticsearch']._index = self.old_ui super(LiveElasticsearchSpellingTestCase, self).tearDown() def test_spelling(self): self.assertEqual(self.sqs.auto_query('structurd').spelling_suggestion(), 'structured') self.assertEqual(self.sqs.spelling_suggestion('structurd'), 'structured') self.assertEqual(self.sqs.auto_query('srchindex instanc').spelling_suggestion(), 'searchindex instance') self.assertEqual(self.sqs.spelling_suggestion('srchindex instanc'), 'searchindex instance') sqs = self.sqs.auto_query('something completely different').set_spelling_query('structurd') self.assertEqual(sqs.spelling_suggestion(), 'structured') class LiveElasticsearchMoreLikeThisTestCase(TestCase): fixtures = ['base_data.json', 'bulk_data.json'] def setUp(self): super(LiveElasticsearchMoreLikeThisTestCase, self).setUp() # Wipe it clean. clear_elasticsearch_index() self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = ElasticsearchMockModelSearchIndex() self.sammi = ElasticsearchAnotherMockModelSearchIndex() self.ui.build(indexes=[self.smmi, self.sammi]) connections['elasticsearch']._index = self.ui self.sqs = SearchQuerySet('elasticsearch') self.smmi.update(using='elasticsearch') self.sammi.update(using='elasticsearch') def tearDown(self): # Restore. connections['elasticsearch']._index = self.old_ui super(LiveElasticsearchMoreLikeThisTestCase, self).tearDown() def test_more_like_this(self): mlt = self.sqs.more_like_this(MockModel.objects.get(pk=1)) self.assertEqual(mlt.count(), 4) self.assertEqual(set([result.pk for result in mlt]), set([u'2', u'6', u'16', u'23'])) self.assertEqual(len([result.pk for result in mlt]), 4) alt_mlt = self.sqs.filter(name='daniel3').more_like_this(MockModel.objects.get(pk=2)) self.assertEqual(alt_mlt.count(), 6) self.assertEqual(set([result.pk for result in alt_mlt]), set([u'2', u'6', u'16', u'23', u'1', u'11'])) self.assertEqual(len([result.pk for result in alt_mlt]), 6) alt_mlt_with_models = self.sqs.models(MockModel).more_like_this(MockModel.objects.get(pk=1)) self.assertEqual(alt_mlt_with_models.count(), 4) self.assertEqual(set([result.pk for result in alt_mlt_with_models]), set([u'2', u'6', u'16', u'23'])) self.assertEqual(len([result.pk for result in alt_mlt_with_models]), 4) if hasattr(MockModel.objects, 'defer'): # Make sure MLT works with deferred bits. mi = MockModel.objects.defer('foo').get(pk=1) deferred = self.sqs.models(MockModel).more_like_this(mi) self.assertEqual(deferred.count(), 4) self.assertEqual(set([result.pk for result in deferred]), set([u'2', u'6', u'16', u'23'])) self.assertEqual(len([result.pk for result in deferred]), 4) # Ensure that swapping the ``result_class`` works. self.assertTrue(isinstance(self.sqs.result_class(MockSearchResult).more_like_this(MockModel.objects.get(pk=1))[0], MockSearchResult)) class LiveElasticsearchAutocompleteTestCase(TestCase): fixtures = ['base_data.json', 'bulk_data.json'] def setUp(self): super(LiveElasticsearchAutocompleteTestCase, self).setUp() # Stow. self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = ElasticsearchAutocompleteMockModelSearchIndex() self.ui.build(indexes=[self.smmi]) connections['elasticsearch']._index = self.ui self.sqs = SearchQuerySet('elasticsearch') # Wipe it clean. clear_elasticsearch_index() # Reboot the schema. self.sb = connections['elasticsearch'].get_backend() self.sb.setup() self.smmi.update(using='elasticsearch') def tearDown(self): # Restore. connections['elasticsearch']._index = self.old_ui super(LiveElasticsearchAutocompleteTestCase, self).tearDown() def test_build_schema(self): self.sb = connections['elasticsearch'].get_backend() content_name, mapping = self.sb.build_schema(self.ui.all_searchfields()) self.assertEqual(mapping, { 'django_id': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, 'django_ct': {'index': 'not_analyzed', 'type': 'string', 'include_in_all': False}, 'name_auto': { 'type': 'string', 'analyzer': 'edgengram_analyzer', }, 'text': { 'type': 'string', 'analyzer': 'snowball', }, 'pub_date': { 'type': 'date' }, 'name': { 'type': 'string', 'analyzer': 'snowball', }, 'text_auto': { 'type': 'string', 'analyzer': 'edgengram_analyzer', } }) def test_autocomplete(self): autocomplete = self.sqs.autocomplete(text_auto='mod') self.assertEqual(autocomplete.count(), 16) self.assertEqual(set([result.pk for result in autocomplete]), set(['1', '12', '6', '14', '7', '4', '23', '17', '13', '18', '20', '22', '19', '15', '10', '2'])) self.assertTrue('mod' in autocomplete[0].text.lower()) self.assertTrue('mod' in autocomplete[1].text.lower()) self.assertTrue('mod' in autocomplete[2].text.lower()) self.assertTrue('mod' in autocomplete[3].text.lower()) self.assertTrue('mod' in autocomplete[4].text.lower()) self.assertEqual(len([result.pk for result in autocomplete]), 16) # Test multiple words. autocomplete_2 = self.sqs.autocomplete(text_auto='your mod') self.assertEqual(autocomplete_2.count(), 13) self.assertEqual(set([result.pk for result in autocomplete_2]), set(['1', '6', '2', '14', '12', '13', '10', '19', '4', '20', '23', '22', '15'])) self.assertTrue('your' in autocomplete_2[0].text.lower()) self.assertTrue('mod' in autocomplete_2[0].text.lower()) self.assertTrue('your' in autocomplete_2[1].text.lower()) self.assertTrue('mod' in autocomplete_2[1].text.lower()) self.assertTrue('your' in autocomplete_2[2].text.lower()) self.assertEqual(len([result.pk for result in autocomplete_2]), 13) # Test multiple fields. autocomplete_3 = self.sqs.autocomplete(text_auto='Django', name_auto='dan') self.assertEqual(autocomplete_3.count(), 4) self.assertEqual(set([result.pk for result in autocomplete_3]), set(['12', '1', '22', '14'])) self.assertEqual(len([result.pk for result in autocomplete_3]), 4) # Test numbers in phrases autocomplete_4 = self.sqs.autocomplete(text_auto='Jen 867') self.assertEqual(autocomplete_4.count(), 1) self.assertEqual(set([result.pk for result in autocomplete_4]), set(['20'])) # Test numbers alone autocomplete_4 = self.sqs.autocomplete(text_auto='867') self.assertEqual(autocomplete_4.count(), 1) self.assertEqual(set([result.pk for result in autocomplete_4]), set(['20'])) class LiveElasticsearchRoundTripTestCase(TestCase): def setUp(self): super(LiveElasticsearchRoundTripTestCase, self).setUp() # Wipe it clean. clear_elasticsearch_index() # Stow. self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.srtsi = ElasticsearchRoundTripSearchIndex() self.ui.build(indexes=[self.srtsi]) connections['elasticsearch']._index = self.ui self.sb = connections['elasticsearch'].get_backend() self.sqs = SearchQuerySet('elasticsearch') # Fake indexing. mock = MockModel() mock.id = 1 self.sb.update(self.srtsi, [mock]) def tearDown(self): # Restore. connections['elasticsearch']._index = self.old_ui super(LiveElasticsearchRoundTripTestCase, self).tearDown() def test_round_trip(self): results = self.sqs.filter(id='core.mockmodel.1') # Sanity check. self.assertEqual(results.count(), 1) # Check the individual fields. result = results[0] self.assertEqual(result.id, 'core.mockmodel.1') self.assertEqual(result.text, 'This is some example text.') self.assertEqual(result.name, 'Mister Pants') self.assertEqual(result.is_active, True) self.assertEqual(result.post_count, 25) self.assertEqual(result.average_rating, 3.6) self.assertEqual(result.price, u'24.99') self.assertEqual(result.pub_date, datetime.date(2009, 11, 21)) self.assertEqual(result.created, datetime.datetime(2009, 11, 21, 21, 31, 00)) self.assertEqual(result.tags, ['staff', 'outdoor', 'activist', 'scientist']) self.assertEqual(result.sites, [3, 5, 1]) @unittest.skipUnless(test_pickling, 'Skipping pickling tests') class LiveElasticsearchPickleTestCase(TestCase): fixtures = ['base_data.json', 'bulk_data.json'] def setUp(self): super(LiveElasticsearchPickleTestCase, self).setUp() # Wipe it clean. clear_elasticsearch_index() # Stow. self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = ElasticsearchMockModelSearchIndex() self.sammi = ElasticsearchAnotherMockModelSearchIndex() self.ui.build(indexes=[self.smmi, self.sammi]) connections['elasticsearch']._index = self.ui self.sqs = SearchQuerySet('elasticsearch') self.smmi.update(using='elasticsearch') self.sammi.update(using='elasticsearch') def tearDown(self): # Restore. connections['elasticsearch']._index = self.old_ui super(LiveElasticsearchPickleTestCase, self).tearDown() def test_pickling(self): results = self.sqs.all() for res in results: # Make sure the cache is full. pass in_a_pickle = pickle.dumps(results) like_a_cuke = pickle.loads(in_a_pickle) self.assertEqual(len(like_a_cuke), len(results)) self.assertEqual(like_a_cuke[0].id, results[0].id) class ElasticsearchBoostBackendTestCase(TestCase): def setUp(self): super(ElasticsearchBoostBackendTestCase, self).setUp() # Wipe it clean. self.raw_es = elasticsearch.Elasticsearch(settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL']) clear_elasticsearch_index() # Stow. self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = ElasticsearchBoostMockSearchIndex() self.ui.build(indexes=[self.smmi]) connections['elasticsearch']._index = self.ui self.sb = connections['elasticsearch'].get_backend() self.sample_objs = [] for i in range(1, 5): mock = AFourthMockModel() mock.id = i if i % 2: mock.author = 'daniel' mock.editor = 'david' else: mock.author = 'david' mock.editor = 'daniel' mock.pub_date = datetime.date(2009, 2, 25) - datetime.timedelta(days=i) self.sample_objs.append(mock) def tearDown(self): connections['elasticsearch']._index = self.old_ui super(ElasticsearchBoostBackendTestCase, self).tearDown() def raw_search(self, query): return self.raw_es.search(q='*:*', index=settings.HAYSTACK_CONNECTIONS['elasticsearch']['INDEX_NAME']) def test_boost(self): self.sb.update(self.smmi, self.sample_objs) self.assertEqual(self.raw_search('*:*')['hits']['total'], 4) results = SearchQuerySet(using='elasticsearch').filter(SQ(author='daniel') | SQ(editor='daniel')) self.assertEqual(set([result.id for result in results]), set([ 'core.afourthmockmodel.4', 'core.afourthmockmodel.3', 'core.afourthmockmodel.1', 'core.afourthmockmodel.2' ])) def test__to_python(self): self.assertEqual(self.sb._to_python('abc'), 'abc') self.assertEqual(self.sb._to_python('1'), 1) self.assertEqual(self.sb._to_python('2653'), 2653) self.assertEqual(self.sb._to_python('25.5'), 25.5) self.assertEqual(self.sb._to_python('[1, 2, 3]'), [1, 2, 3]) self.assertEqual(self.sb._to_python('{"a": 1, "b": 2, "c": 3}'), {'a': 1, 'c': 3, 'b': 2}) self.assertEqual(self.sb._to_python('2009-05-09T16:14:00'), datetime.datetime(2009, 5, 9, 16, 14)) self.assertEqual(self.sb._to_python('2009-05-09T00:00:00'), datetime.datetime(2009, 5, 9, 0, 0)) self.assertEqual(self.sb._to_python(None), None) class RecreateIndexTestCase(TestCase): def setUp(self): self.raw_es = elasticsearch.Elasticsearch( settings.HAYSTACK_CONNECTIONS['elasticsearch']['URL']) def test_recreate_index(self): clear_elasticsearch_index() sb = connections['elasticsearch'].get_backend() sb.silently_fail = True sb.setup() original_mapping = self.raw_es.indices.get_mapping(index=sb.index_name) sb.clear() sb.setup() try: updated_mapping = self.raw_es.indices.get_mapping(sb.index_name) except elasticsearch.NotFoundError: self.fail("There is no mapping after recreating the index") self.assertEqual(original_mapping, updated_mapping, "Mapping after recreating the index differs from the original one") class ElasticsearchFacetingTestCase(TestCase): def setUp(self): super(ElasticsearchFacetingTestCase, self).setUp() # Wipe it clean. clear_elasticsearch_index() # Stow. self.old_ui = connections['elasticsearch'].get_unified_index() self.ui = UnifiedIndex() self.smmi = ElasticsearchFacetingMockSearchIndex() self.ui.build(indexes=[self.smmi]) connections['elasticsearch']._index = self.ui self.sb = connections['elasticsearch'].get_backend() # Force the backend to rebuild the mapping each time. self.sb.existing_mapping = {} self.sb.setup() self.sample_objs = [] for i in range(1, 10): mock = AFourthMockModel() mock.id = i if i > 5: mock.editor = 'George Taylor' else: mock.editor = 'Perry White' if i % 2: mock.author = 'Daniel Lindsley' else: mock.author = 'Dan Watson' mock.pub_date = datetime.date(2013, 9, (i % 4) + 1) self.sample_objs.append(mock) def tearDown(self): connections['elasticsearch']._index = self.old_ui super(ElasticsearchFacetingTestCase, self).tearDown() def test_facet(self): self.sb.update(self.smmi, self.sample_objs) counts = SearchQuerySet('elasticsearch').facet('author').facet('editor').facet_counts() self.assertEqual(counts['fields']['author'], [ ('Daniel Lindsley', 5), ('Dan Watson', 4), ]) self.assertEqual(counts['fields']['editor'], [ ('Perry White', 5), ('George Taylor', 4), ]) counts = SearchQuerySet('elasticsearch').filter(content='white').facet('facet_field', order='reverse_count').facet_counts() self.assertEqual(counts['fields']['facet_field'], [ ('Dan Watson', 2), ('Daniel Lindsley', 3), ]) def test_multiple_narrow(self): self.sb.update(self.smmi, self.sample_objs) counts = SearchQuerySet('elasticsearch').narrow('editor_exact:"Perry White"').narrow('author_exact:"Daniel Lindsley"').facet('author').facet_counts() self.assertEqual(counts['fields']['author'], [ ('Daniel Lindsley', 3), ]) def test_narrow(self): self.sb.update(self.smmi, self.sample_objs) counts = SearchQuerySet('elasticsearch').facet('author').facet('editor').narrow('editor_exact:"Perry White"').facet_counts() self.assertEqual(counts['fields']['author'], [ ('Daniel Lindsley', 3), ('Dan Watson', 2), ]) self.assertEqual(counts['fields']['editor'], [ ('Perry White', 5), ]) def test_date_facet(self): self.sb.update(self.smmi, self.sample_objs) start = datetime.date(2013, 9, 1) end = datetime.date(2013, 9, 30) # Facet by day counts = SearchQuerySet('elasticsearch').date_facet('pub_date', start_date=start, end_date=end, gap_by='day').facet_counts() self.assertEqual(counts['dates']['pub_date'], [ (datetime.datetime(2013, 9, 1), 2), (datetime.datetime(2013, 9, 2), 3), (datetime.datetime(2013, 9, 3), 2), (datetime.datetime(2013, 9, 4), 2), ]) # By month counts = SearchQuerySet('elasticsearch').date_facet('pub_date', start_date=start, end_date=end, gap_by='month').facet_counts() self.assertEqual(counts['dates']['pub_date'], [ (datetime.datetime(2013, 9, 1), 9), ]) django-haystack-2.8.0/test_haystack/elasticsearch_tests/test_elasticsearch_query.py000066400000000000000000000245161325051407000311470ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import datetime import elasticsearch from django.test import TestCase from haystack import connections from haystack.inputs import Exact from haystack.models import SearchResult from haystack.query import SearchQuerySet, SQ from haystack.utils.geo import D, Point from ..core.models import AnotherMockModel, MockModel class ElasticsearchSearchQueryTestCase(TestCase): fixtures = ['base_data'] def setUp(self): super(ElasticsearchSearchQueryTestCase, self).setUp() self.sq = connections['elasticsearch'].get_query() def test_build_query_all(self): self.assertEqual(self.sq.build_query(), '*:*') def test_build_query_single_word(self): self.sq.add_filter(SQ(content='hello')) self.assertEqual(self.sq.build_query(), '(hello)') def test_build_query_boolean(self): self.sq.add_filter(SQ(content=True)) self.assertEqual(self.sq.build_query(), '(True)') def test_regression_slash_search(self): self.sq.add_filter(SQ(content='hello/')) self.assertEqual(self.sq.build_query(), '(hello\\/)') def test_build_query_datetime(self): self.sq.add_filter(SQ(content=datetime.datetime(2009, 5, 8, 11, 28))) self.assertEqual(self.sq.build_query(), '(2009-05-08T11:28:00)') def test_build_query_multiple_words_and(self): self.sq.add_filter(SQ(content='hello')) self.sq.add_filter(SQ(content='world')) self.assertEqual(self.sq.build_query(), '((hello) AND (world))') def test_build_query_multiple_words_not(self): self.sq.add_filter(~SQ(content='hello')) self.sq.add_filter(~SQ(content='world')) self.assertEqual(self.sq.build_query(), '(NOT ((hello)) AND NOT ((world)))') def test_build_query_multiple_words_or(self): self.sq.add_filter(~SQ(content='hello')) self.sq.add_filter(SQ(content='hello'), use_or=True) self.assertEqual(self.sq.build_query(), '(NOT ((hello)) OR (hello))') def test_build_query_multiple_words_mixed(self): self.sq.add_filter(SQ(content='why')) self.sq.add_filter(SQ(content='hello'), use_or=True) self.sq.add_filter(~SQ(content='world')) self.assertEqual(self.sq.build_query(), u'(((why) OR (hello)) AND NOT ((world)))') def test_build_query_phrase(self): self.sq.add_filter(SQ(content='hello world')) self.assertEqual(self.sq.build_query(), '(hello AND world)') self.sq.add_filter(SQ(content__exact='hello world')) self.assertEqual(self.sq.build_query(), u'((hello AND world) AND ("hello world"))') def test_build_query_boost(self): self.sq.add_filter(SQ(content='hello')) self.sq.add_boost('world', 5) self.assertEqual(self.sq.build_query(), "(hello) world^5") def test_build_query_multiple_filter_types(self): self.sq.add_filter(SQ(content='why')) self.sq.add_filter(SQ(pub_date__lte=Exact('2009-02-10 01:59:00'))) self.sq.add_filter(SQ(author__gt='daniel')) self.sq.add_filter(SQ(created__lt=Exact('2009-02-12 12:13:00'))) self.sq.add_filter(SQ(title__gte='B')) self.sq.add_filter(SQ(id__in=[1, 2, 3])) self.sq.add_filter(SQ(rating__range=[3, 5])) self.assertEqual(self.sq.build_query(), u'((why) AND pub_date:([* TO "2009-02-10 01:59:00"]) AND author:({"daniel" TO *}) AND created:({* TO "2009-02-12 12:13:00"}) AND title:(["B" TO *]) AND id:("1" OR "2" OR "3") AND rating:(["3" TO "5"]))') def test_build_query_multiple_filter_types_with_datetimes(self): self.sq.add_filter(SQ(content='why')) self.sq.add_filter(SQ(pub_date__lte=datetime.datetime(2009, 2, 10, 1, 59, 0))) self.sq.add_filter(SQ(author__gt='daniel')) self.sq.add_filter(SQ(created__lt=datetime.datetime(2009, 2, 12, 12, 13, 0))) self.sq.add_filter(SQ(title__gte='B')) self.sq.add_filter(SQ(id__in=[1, 2, 3])) self.sq.add_filter(SQ(rating__range=[3, 5])) self.assertEqual(self.sq.build_query(), u'((why) AND pub_date:([* TO "2009-02-10T01:59:00"]) AND author:({"daniel" TO *}) AND created:({* TO "2009-02-12T12:13:00"}) AND title:(["B" TO *]) AND id:("1" OR "2" OR "3") AND rating:(["3" TO "5"]))') def test_build_query_in_filter_multiple_words(self): self.sq.add_filter(SQ(content='why')) self.sq.add_filter(SQ(title__in=["A Famous Paper", "An Infamous Article"])) self.assertEqual(self.sq.build_query(), u'((why) AND title:("A Famous Paper" OR "An Infamous Article"))') def test_build_query_in_filter_datetime(self): self.sq.add_filter(SQ(content='why')) self.sq.add_filter(SQ(pub_date__in=[datetime.datetime(2009, 7, 6, 1, 56, 21)])) self.assertEqual(self.sq.build_query(), u'((why) AND pub_date:("2009-07-06T01:56:21"))') def test_build_query_in_with_set(self): self.sq.add_filter(SQ(content='why')) self.sq.add_filter(SQ(title__in=set(["A Famous Paper", "An Infamous Article"]))) self.assertTrue('((why) AND title:(' in self.sq.build_query()) self.assertTrue('"A Famous Paper"' in self.sq.build_query()) self.assertTrue('"An Infamous Article"' in self.sq.build_query()) def test_build_query_wildcard_filter_types(self): self.sq.add_filter(SQ(content='why')) self.sq.add_filter(SQ(title__startswith='haystack')) self.assertEqual(self.sq.build_query(), u'((why) AND title:(haystack*))') def test_build_query_fuzzy_filter_types(self): self.sq.add_filter(SQ(content='why')) self.sq.add_filter(SQ(title__fuzzy='haystack')) self.assertEqual(self.sq.build_query(), u'((why) AND title:(haystack~))') def test_build_query_with_contains(self): self.sq.add_filter(SQ(content='circular')) self.sq.add_filter(SQ(title__contains='haystack')) self.assertEqual(self.sq.build_query(), u'((circular) AND title:(*haystack*))') def test_build_query_with_endswith(self): self.sq.add_filter(SQ(content='circular')) self.sq.add_filter(SQ(title__endswith='haystack')) self.assertEqual(self.sq.build_query(), u'((circular) AND title:(*haystack))') def test_clean(self): self.assertEqual(self.sq.clean('hello world'), 'hello world') self.assertEqual(self.sq.clean('hello AND world'), 'hello and world') self.assertEqual(self.sq.clean('hello AND OR NOT TO + - && || ! ( ) { } [ ] ^ " ~ * ? : \ / world'), 'hello and or not to \\+ \\- \\&& \\|| \\! \\( \\) \\{ \\} \\[ \\] \\^ \\" \\~ \\* \\? \\: \\\\ \\/ world') self.assertEqual(self.sq.clean('so please NOTe i am in a bAND and bORed'), 'so please NOTe i am in a bAND and bORed') def test_build_query_with_models(self): self.sq.add_filter(SQ(content='hello')) self.sq.add_model(MockModel) self.assertEqual(self.sq.build_query(), '(hello)') self.sq.add_model(AnotherMockModel) self.assertEqual(self.sq.build_query(), u'(hello)') def test_set_result_class(self): # Assert that we're defaulting to ``SearchResult``. self.assertTrue(issubclass(self.sq.result_class, SearchResult)) # Custom class. class IttyBittyResult(object): pass self.sq.set_result_class(IttyBittyResult) self.assertTrue(issubclass(self.sq.result_class, IttyBittyResult)) # Reset to default. self.sq.set_result_class(None) self.assertTrue(issubclass(self.sq.result_class, SearchResult)) def test_in_filter_values_list(self): self.sq.add_filter(SQ(content='why')) self.sq.add_filter(SQ(title__in=MockModel.objects.values_list('id', flat=True))) self.assertEqual(self.sq.build_query(), u'((why) AND title:("1" OR "2" OR "3"))') def test_narrow_sq(self): sqs = SearchQuerySet(using='elasticsearch').narrow(SQ(foo='moof')) self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.narrow_queries), 1) self.assertEqual(sqs.query.narrow_queries.pop(), 'foo:(moof)') def test_query__in(self): sqs = SearchQuerySet(using='elasticsearch').filter(id__in=[1, 2, 3]) self.assertEqual(sqs.query.build_query(), u'id:("1" OR "2" OR "3")') def test_query__in_empty_list(self): """Confirm that an empty list avoids a Elasticsearch exception""" sqs = SearchQuerySet(using='elasticsearch').filter(id__in=[]) self.assertEqual(sqs.query.build_query(), u'id:(!*:*)') class ElasticsearchSearchQuerySpatialBeforeReleaseTestCase(TestCase): def setUp(self): super(ElasticsearchSearchQuerySpatialBeforeReleaseTestCase, self).setUp() self.backend = connections['elasticsearch'].get_backend() self._elasticsearch_version = elasticsearch.VERSION elasticsearch.VERSION = (0,9,9) def tearDown(self): elasticsearch.VERSION = self._elasticsearch_version def test_build_query_with_dwithin_range(self): """ Test build_search_kwargs with dwithin range for Elasticsearch versions < 1.0.0 """ search_kwargs = self.backend.build_search_kwargs('where', dwithin={ 'field': "location_field", 'point': Point(1.2345678, 2.3456789), 'distance': D(m=500) }) self.assertEqual(search_kwargs['query']['filtered']['filter']['bool']['must'][1]['geo_distance'], {'distance': 0.5, 'location_field': {'lat': 2.3456789, 'lon': 1.2345678}}) class ElasticsearchSearchQuerySpatialAfterReleaseTestCase(TestCase): def setUp(self): super(ElasticsearchSearchQuerySpatialAfterReleaseTestCase, self).setUp() self.backend = connections['elasticsearch'].get_backend() self._elasticsearch_version = elasticsearch.VERSION elasticsearch.VERSION = (1,0,0) def tearDown(self): elasticsearch.VERSION = self._elasticsearch_version def test_build_query_with_dwithin_range(self): """ Test build_search_kwargs with dwithin range for Elasticsearch versions >= 1.0.0 """ search_kwargs = self.backend.build_search_kwargs('where', dwithin={ 'field': "location_field", 'point': Point(1.2345678, 2.3456789), 'distance': D(m=500) }) self.assertEqual(search_kwargs['query']['filtered']['filter']['bool']['must'][1]['geo_distance'], {'distance': "0.500000km", 'location_field': {'lat': 2.3456789, 'lon': 1.2345678}}) django-haystack-2.8.0/test_haystack/elasticsearch_tests/test_inputs.py000066400000000000000000000070371325051407000264310ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.test import TestCase from haystack import connections, inputs class ElasticsearchInputTestCase(TestCase): def setUp(self): super(ElasticsearchInputTestCase, self).setUp() self.query_obj = connections['elasticsearch'].get_query() def test_raw_init(self): raw = inputs.Raw('hello OR there, :you') self.assertEqual(raw.query_string, 'hello OR there, :you') self.assertEqual(raw.kwargs, {}) self.assertEqual(raw.post_process, False) raw = inputs.Raw('hello OR there, :you', test='really') self.assertEqual(raw.query_string, 'hello OR there, :you') self.assertEqual(raw.kwargs, {'test': 'really'}) self.assertEqual(raw.post_process, False) def test_raw_prepare(self): raw = inputs.Raw('hello OR there, :you') self.assertEqual(raw.prepare(self.query_obj), 'hello OR there, :you') def test_clean_init(self): clean = inputs.Clean('hello OR there, :you') self.assertEqual(clean.query_string, 'hello OR there, :you') self.assertEqual(clean.post_process, True) def test_clean_prepare(self): clean = inputs.Clean('hello OR there, :you') self.assertEqual(clean.prepare(self.query_obj), 'hello or there, \\:you') def test_exact_init(self): exact = inputs.Exact('hello OR there, :you') self.assertEqual(exact.query_string, 'hello OR there, :you') self.assertEqual(exact.post_process, True) def test_exact_prepare(self): exact = inputs.Exact('hello OR there, :you') self.assertEqual(exact.prepare(self.query_obj), u'"hello OR there, :you"') exact = inputs.Exact('hello OR there, :you', clean=True) self.assertEqual(exact.prepare(self.query_obj), u'"hello or there, \\:you"') def test_not_init(self): not_it = inputs.Not('hello OR there, :you') self.assertEqual(not_it.query_string, 'hello OR there, :you') self.assertEqual(not_it.post_process, True) def test_not_prepare(self): not_it = inputs.Not('hello OR there, :you') self.assertEqual(not_it.prepare(self.query_obj), u'NOT (hello or there, \\:you)') def test_autoquery_init(self): autoquery = inputs.AutoQuery('panic -don\'t "froody dude"') self.assertEqual(autoquery.query_string, 'panic -don\'t "froody dude"') self.assertEqual(autoquery.post_process, False) def test_autoquery_prepare(self): autoquery = inputs.AutoQuery('panic -don\'t "froody dude"') self.assertEqual(autoquery.prepare(self.query_obj), u'panic NOT don\'t "froody dude"') def test_altparser_init(self): altparser = inputs.AltParser('dismax') self.assertEqual(altparser.parser_name, 'dismax') self.assertEqual(altparser.query_string, '') self.assertEqual(altparser.kwargs, {}) self.assertEqual(altparser.post_process, False) altparser = inputs.AltParser('dismax', 'douglas adams', qf='author', mm=1) self.assertEqual(altparser.parser_name, 'dismax') self.assertEqual(altparser.query_string, 'douglas adams') self.assertEqual(altparser.kwargs, {'mm': 1, 'qf': 'author'}) self.assertEqual(altparser.post_process, False) def test_altparser_prepare(self): altparser = inputs.AltParser('dismax', 'douglas adams', qf='author', mm=1) self.assertEqual(altparser.prepare(self.query_obj), u"""{!dismax mm=1 qf=author v='douglas adams'}""") django-haystack-2.8.0/test_haystack/mocks.py000066400000000000000000000124371325051407000211300ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.apps import apps from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, log_query from haystack.models import SearchResult from haystack.routers import BaseRouter from haystack.utils import get_identifier class MockMasterSlaveRouter(BaseRouter): def for_read(self, **hints): return 'slave' def for_write(self, **hints): return 'master' class MockPassthroughRouter(BaseRouter): def for_read(self, **hints): if hints.get('pass_through') is False: return 'pass' return None def for_write(self, **hints): if hints.get('pass_through') is False: return 'pass' return None class MockMultiRouter(BaseRouter): def for_write(self, **hints): return ['multi1', 'multi2'] class MockSearchResult(SearchResult): def __init__(self, app_label, model_name, pk, score, **kwargs): super(MockSearchResult, self).__init__(app_label, model_name, pk, score, **kwargs) self._model = apps.get_model('core', model_name) MOCK_SEARCH_RESULTS = [MockSearchResult('core', 'MockModel', i, 1 - (i / 100.0)) for i in range(1, 100)] MOCK_INDEX_DATA = {} class MockSearchBackend(BaseSearchBackend): model_name = 'mockmodel' def update(self, index, iterable, commit=True): global MOCK_INDEX_DATA for obj in iterable: doc = index.full_prepare(obj) MOCK_INDEX_DATA[doc['id']] = doc def remove(self, obj, commit=True): global MOCK_INDEX_DATA if commit: del(MOCK_INDEX_DATA[get_identifier(obj)]) def clear(self, models=None, commit=True): global MOCK_INDEX_DATA MOCK_INDEX_DATA = {} @log_query def search(self, query_string, **kwargs): from haystack import connections global MOCK_INDEX_DATA results = [] hits = len(MOCK_INDEX_DATA) indexed_models = connections['default'].get_unified_index().get_indexed_models() def junk_sort(key): app, model, pk = key.split('.') if pk.isdigit(): return int(pk) else: return ord(pk[0]) sliced = sorted(MOCK_INDEX_DATA, key=junk_sort) for i, result in enumerate(sliced): app_label, model_name, pk = result.split('.') model = apps.get_model(app_label, model_name) if model: if model in indexed_models: results.append(MockSearchResult(app_label, model_name, pk, 1 - (i / 100.0))) else: hits -= 1 else: hits -= 1 return { 'results': results[kwargs.get('start_offset'):kwargs.get('end_offset')], 'hits': hits, } def more_like_this(self, model_instance, additional_query_string=None, result_class=None): return self.search(query_string='*') class CharPKMockSearchBackend(MockSearchBackend): model_name = 'charpkmockmodel' mock_search_results = [MockSearchResult('core', 'CharPKMockModel', 'sometext', 0.5), MockSearchResult('core', 'CharPKMockModel', '1234', 0.3)] class UUIDMockSearchBackend(MockSearchBackend): model_name = 'uuidmockmodel' mock_search_results = [MockSearchResult('core', 'UUIDMockModel', '53554c58-7051-4350-bcc9-dad75eb248a9', 0.5), MockSearchResult('core', 'UUIDMockModel', '77554c58-7051-4350-bcc9-dad75eb24888', 0.5)] class ReadQuerySetMockSearchBackend(MockSearchBackend): model_name = 'afifthmockmodel' mock_search_results = [MockSearchResult('core', 'afifthmockmodel', 1, 2), MockSearchResult('core', 'afifthmockmodel', 2, 2)] class MixedMockSearchBackend(MockSearchBackend): @log_query def search(self, query_string, **kwargs): if kwargs.get('end_offset') and kwargs['end_offset'] > 30: kwargs['end_offset'] = 30 result_info = super(MixedMockSearchBackend, self).search(query_string, **kwargs) result_info['hits'] = 30 # Remove search results from other models. temp_results = [] for result in result_info['results']: if not int(result.pk) in (9, 13, 14): # MockSearchResult('core', 'AnotherMockModel', 9, .1) # MockSearchResult('core', 'AnotherMockModel', 13, .1) # MockSearchResult('core', 'NonexistentMockModel', 14, .1) temp_results.append(result) result_info['results'] = temp_results return result_info class MockSearchQuery(BaseSearchQuery): def build_query(self): return '' def clean(self, query_fragment): return query_fragment # def run_mlt(self): # # To simulate the chunking behavior of a regular search, return a slice # # of our results using start/end offset. # final_query = self.build_query() # results = self.backend.more_like_this(self._mlt_instance, final_query) # import pdb; pdb.set_trace() # self._results = results['results'][self.start_offset:self.end_offset] # self._hit_count = results['hits'] class MockEngine(BaseEngine): backend = MockSearchBackend query = MockSearchQuery django-haystack-2.8.0/test_haystack/multipleindex/000077500000000000000000000000001325051407000223165ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/multipleindex/__init__.py000066400000000000000000000011611325051407000244260ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import haystack from haystack.signals import RealtimeSignalProcessor from django.apps import apps from ..utils import check_solr _old_sp = None def setup(): check_solr() global _old_sp config = apps.get_app_config('haystack') _old_sp = config.signal_processor config.signal_processor = RealtimeSignalProcessor(haystack.connections, haystack.connection_router) def teardown(): config = apps.get_app_config('haystack') config.signal_processor.teardown() config.signal_processor = _old_sp django-haystack-2.8.0/test_haystack/multipleindex/models.py000066400000000000000000000007011325051407000241510ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.db import models class Foo(models.Model): title = models.CharField(max_length=255) body = models.TextField() def __unicode__(self): return self.title class Bar(models.Model): author = models.CharField(max_length=255) content = models.TextField() def __unicode__(self): return self.author django-haystack-2.8.0/test_haystack/multipleindex/routers.py000066400000000000000000000005101325051407000243670ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from haystack.routers import BaseRouter class MultipleIndexRouter(BaseRouter): def for_write(self, instance=None, **hints): if instance and instance._meta.app_label == 'multipleindex': return 'solr' django-haystack-2.8.0/test_haystack/multipleindex/search_indexes.py000066400000000000000000000016361325051407000256620ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from haystack import indexes from haystack.indexes import Indexable, SearchIndex from .models import Bar, Foo # To test additional ignores... class BaseIndex(indexes.SearchIndex): text = indexes.CharField(document=True, model_attr='body') def get_model(self): return Foo class FooIndex(BaseIndex, indexes.Indexable): def index_queryset(self, using=None): qs = super(FooIndex, self).index_queryset(using=using) if using == "filtered_whoosh": qs = qs.filter(body__contains="1") return qs # Import the old way & make sure things don't explode. class BarIndex(SearchIndex, Indexable): text = indexes.CharField(document=True) def get_model(self): return Bar def prepare_text(self, obj): return u"%s\n%s" % (obj.author, obj.content) django-haystack-2.8.0/test_haystack/multipleindex/tests.py000066400000000000000000000301401325051407000240300ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.db import models from haystack import connections from haystack.exceptions import NotHandled from haystack.query import SearchQuerySet from haystack.signals import BaseSignalProcessor from ..whoosh_tests.testcases import WhooshTestCase from .models import Bar, Foo from .search_indexes import BarIndex, FooIndex class MultipleIndexTestCase(WhooshTestCase): def setUp(self): super(MultipleIndexTestCase, self).setUp() self.ui = connections['solr'].get_unified_index() self.fi = self.ui.get_index(Foo) self.bi = self.ui.get_index(Bar) self.solr_backend = connections['solr'].get_backend() self.whoosh_backend = connections['whoosh'].get_backend() self.filtered_whoosh_backend = connections['filtered_whoosh'].get_backend() Foo.objects.bulk_create([ Foo(title='Haystack test', body='foo 1'), Foo(title='Another Haystack test', body='foo 2') ]) Bar.objects.bulk_create([ Bar(author='Haystack test', content='bar 1'), Bar(author='Another Haystack test', content='bar 2'), Bar(author='Yet another Haystack test', content='bar 3'), ]) self.fi.reindex(using='solr') self.fi.reindex(using='whoosh') self.bi.reindex(using='solr') def tearDown(self): self.fi.clear(using='solr') self.bi.clear(using='solr') super(MultipleIndexTestCase, self).tearDown() def test_index_update_object_using(self): results = self.solr_backend.search('foo') self.assertEqual(results['hits'], 2) results = self.whoosh_backend.search('foo') self.assertEqual(results['hits'], 2) foo_3 = Foo.objects.create( title='Whee another Haystack test', body='foo 3', ) self.fi.update_object(foo_3, using='solr') results = self.solr_backend.search('foo') self.assertEqual(results['hits'], 3) results = self.whoosh_backend.search('foo') self.assertEqual(results['hits'], 2) self.fi.update_object(foo_3, using='whoosh') results = self.solr_backend.search('foo') self.assertEqual(results['hits'], 3) results = self.whoosh_backend.search('foo') self.assertEqual(results['hits'], 3) def test_index_remove_object_using(self): results = self.solr_backend.search('foo') self.assertEqual(results['hits'], 2) results = self.whoosh_backend.search('foo') self.assertEqual(results['hits'], 2) foo_1 = Foo.objects.get(pk=1) self.fi.remove_object(foo_1, using='solr') results = self.solr_backend.search('foo') self.assertEqual(results['hits'], 1) results = self.whoosh_backend.search('foo') self.assertEqual(results['hits'], 2) self.fi.remove_object(foo_1, using='whoosh') results = self.solr_backend.search('foo') self.assertEqual(results['hits'], 1) results = self.whoosh_backend.search('foo') self.assertEqual(results['hits'], 1) def test_index_clear_using(self): results = self.solr_backend.search('foo') self.assertEqual(results['hits'], 2) results = self.whoosh_backend.search('foo') self.assertEqual(results['hits'], 2) self.fi.clear(using='solr') results = self.solr_backend.search('foo') self.assertEqual(results['hits'], 0) results = self.whoosh_backend.search('foo') self.assertEqual(results['hits'], 2) self.fi.clear(using='whoosh') results = self.solr_backend.search('foo') self.assertEqual(results['hits'], 0) results = self.whoosh_backend.search('foo') self.assertEqual(results['hits'], 0) def test_index_update_using(self): self.fi.clear(using='solr') self.fi.clear(using='whoosh') self.bi.clear(using='solr') self.bi.clear(using='whoosh') results = self.solr_backend.search('foo') self.assertEqual(results['hits'], 0) results = self.whoosh_backend.search('foo') self.assertEqual(results['hits'], 0) self.fi.update(using='solr') results = self.solr_backend.search('foo') self.assertEqual(results['hits'], 2) results = self.whoosh_backend.search('foo') self.assertEqual(results['hits'], 0) self.fi.update(using='whoosh') results = self.solr_backend.search('foo') self.assertEqual(results['hits'], 2) results = self.whoosh_backend.search('foo') self.assertEqual(results['hits'], 2) def test_searchqueryset_using(self): # Using the default. sqs = SearchQuerySet('solr') self.assertEqual(sqs.count(), 5) self.assertEqual(sqs.models(Foo).count(), 2) self.assertEqual(sqs.models(Bar).count(), 3) self.assertEqual(sqs.using('solr').count(), 5) self.assertEqual(sqs.using('solr').models(Foo).count(), 2) self.assertEqual(sqs.using('solr').models(Bar).count(), 3) self.assertEqual(sqs.using('whoosh').count(), 2) self.assertEqual(sqs.using('whoosh').models(Foo).count(), 2) self.assertEqual(sqs.using('whoosh').models(Bar).count(), 0) def test_searchquery_using(self): sq = connections['solr'].get_query() # Using the default. self.assertEqual(sq.get_count(), 5) # "Swap" to the default. sq = sq.using('solr') self.assertEqual(sq.get_count(), 5) # Swap the ``SearchQuery`` used. sq = sq.using('whoosh') self.assertEqual(sq.get_count(), 2) def test_excluded_indexes(self): wui = connections['filtered_whoosh'].get_unified_index() self.assertTrue(any(isinstance(i, FooIndex) for i in wui.collect_indexes())) self.assertFalse(any(isinstance(i, BarIndex) for i in wui.collect_indexes())) # Shouldn't error. wui.get_index(Foo) # Should error, since it's not present. self.assertRaises(NotHandled, wui.get_index, Bar) def test_filtered_index_update(self): for i in ('whoosh', 'filtered_whoosh'): self.fi.clear(using=i) self.fi.update(using=i) results = self.whoosh_backend.search('foo') self.assertEqual(results['hits'], 2) results = self.filtered_whoosh_backend.search('foo') self.assertEqual(results['hits'], 1, "Filtered backend should only contain one record") class TestSignalProcessor(BaseSignalProcessor): def setup(self): self.setup_ran = True super(TestSignalProcessor, self).setup() def teardown(self): self.teardown_ran = True super(TestSignalProcessor, self).teardown() class SignalProcessorTestCase(WhooshTestCase): def setUp(self): super(SignalProcessorTestCase, self).setUp() # Blatantly wrong data, just for assertion purposes. self.fake_connections = {} self.fake_router = [] self.ui = connections['solr'].get_unified_index() self.fi = self.ui.get_index(Foo) self.bi = self.ui.get_index(Bar) self.solr_backend = connections['solr'].get_backend() self.whoosh_backend = connections['whoosh'].get_backend() self.foo_1 = Foo.objects.create( title='Haystack test', body='foo 1', ) self.foo_2 = Foo.objects.create( title='Another Haystack test', body='foo 2', ) self.bar_1 = Bar.objects.create( author='Haystack test', content='bar 1', ) self.bar_2 = Bar.objects.create( author='Another Haystack test', content='bar 2', ) self.bar_3 = Bar.objects.create( author='Yet another Haystack test', content='bar 3', ) self.fi.reindex(using='solr') self.fi.reindex(using='whoosh') self.bi.reindex(using='solr') def tearDown(self): self.fi.clear(using='solr') self.bi.clear(using='solr') super(SignalProcessorTestCase, self).tearDown() def test_init(self): tsp = TestSignalProcessor(self.fake_connections, self.fake_router) self.assertEqual(tsp.connections, self.fake_connections) self.assertEqual(tsp.connection_router, self.fake_router) # We fake some side-effects to make sure it ran. self.assertTrue(tsp.setup_ran) bsp = BaseSignalProcessor(self.fake_connections, self.fake_router) self.assertFalse(getattr(bsp, 'setup_ran', False)) def test_setup(self): tsp = TestSignalProcessor(self.fake_connections, self.fake_router) tsp.setup() self.assertTrue(tsp.setup_ran) def test_teardown(self): tsp = TestSignalProcessor(self.fake_connections, self.fake_router) tsp.teardown() self.assertTrue(tsp.teardown_ran) def test_handle_save(self): # Because the code here is pretty leaky (abstraction-wise), we'll test # the actual setup. # First, ensure the signal is setup. self.assertEqual(len(models.signals.post_save.receivers), 1) # Second, check the existing search data. sqs = SearchQuerySet('solr') self.assertEqual(sqs.using('solr').count(), 5) self.assertEqual(sqs.using('solr').models(Foo).count(), 2) self.assertEqual(sqs.using('solr').models(Bar).count(), 3) self.assertEqual(sqs.using('whoosh').count(), 2) self.assertEqual(sqs.using('whoosh').models(Foo).count(), 2) self.assertEqual(sqs.using('solr').models(Foo).order_by('django_id')[0].text, 'foo 1') self.assertEqual(sqs.using('whoosh').models(Foo).order_by('django_id')[0].text, 'foo 1') # Third, save the model, which should fire the signal & index the # new data. self.foo_1.body = 'A different body' self.foo_1.save() # Fourth, check the search data for the updated data, making sure counts # haven't changed. sqs = SearchQuerySet('solr') self.assertEqual(sqs.using('solr').count(), 5) self.assertEqual(sqs.using('solr').models(Foo).count(), 2) self.assertEqual(sqs.using('solr').models(Bar).count(), 3) self.assertEqual(sqs.using('whoosh').count(), 2) self.assertEqual(sqs.using('whoosh').models(Foo).count(), 2) self.assertEqual(sqs.using('solr').models(Foo).order_by('django_id')[0].text, 'A different body') self.assertEqual(sqs.using('whoosh').models(Foo).order_by('django_id')[0].text, 'foo 1') def test_handle_delete(self): # Because the code here is pretty leaky (abstraction-wise), we'll test # the actual setup. # First, ensure the signal is setup. self.assertEqual(len(models.signals.post_delete.receivers), 1) # Second, check the existing search data. sqs = SearchQuerySet('solr') self.assertEqual(sqs.using('solr').count(), 5) self.assertEqual(sqs.using('solr').models(Foo).count(), 2) self.assertEqual(sqs.using('solr').models(Bar).count(), 3) self.assertEqual(sqs.using('whoosh').count(), 2) self.assertEqual(sqs.using('whoosh').models(Foo).count(), 2) self.assertEqual(sqs.using('solr').models(Foo).order_by('django_id')[0].text, 'foo 1') self.assertEqual(sqs.using('whoosh').models(Foo).order_by('django_id')[0].text, 'foo 1') # Third, delete the model, which should fire the signal & remove the # record from the index. self.foo_1.delete() # Fourth, check the search data for the now-removed data, making sure counts # have changed correctly. sqs = SearchQuerySet('solr') self.assertEqual(sqs.using('solr').count(), 4) self.assertEqual(sqs.using('solr').models(Foo).count(), 1) self.assertEqual(sqs.using('solr').models(Bar).count(), 3) self.assertEqual(sqs.using('whoosh').count(), 2) self.assertEqual(sqs.using('whoosh').models(Foo).count(), 2) self.assertEqual(sqs.using('solr').models(Foo).order_by('django_id')[0].text, 'foo 2') self.assertEqual(sqs.using('whoosh').models(Foo).order_by('django_id')[0].text, 'foo 1') django-haystack-2.8.0/test_haystack/results_per_page_urls.py000066400000000000000000000006601325051407000244170ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.conf.urls import url from haystack.views import SearchView class CustomPerPage(SearchView): results_per_page = 1 urlpatterns = [ url(r'^search/$', CustomPerPage(load_all=False), name='haystack_search'), url(r'^search2/$', CustomPerPage(load_all=False, results_per_page=2), name='haystack_search'), ] django-haystack-2.8.0/test_haystack/run_tests.py000077500000000000000000000012111325051407000220310ustar00rootroot00000000000000#!/usr/bin/env python # encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import sys from os.path import abspath, dirname import nose def run_all(argv=None): sys.exitfunc = lambda: sys.stderr.write('Shutting down....\n') # always insert coverage when running tests through setup.py if argv is None: argv = [ 'nosetests', '--with-coverage', '--cover-package=haystack', '--cover-erase', '--verbose', ] nose.run_exit( argv=argv, defaultTest=abspath(dirname(__file__)) ) if __name__ == '__main__': run_all(sys.argv) django-haystack-2.8.0/test_haystack/settings.py000066400000000000000000000067321325051407000216550ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import os from tempfile import mkdtemp SECRET_KEY = "Please do not spew DeprecationWarnings" # Haystack settings for running tests. DATABASES = { 'default': { 'ENGINE': 'django.db.backends.sqlite3', 'NAME': 'haystack_tests.db', } } INSTALLED_APPS = [ 'django.contrib.admin', 'django.contrib.auth', 'django.contrib.contenttypes', 'django.contrib.sessions', 'haystack', 'test_haystack.discovery', 'test_haystack.core', 'test_haystack.spatial', 'test_haystack.multipleindex', # This app exists to confirm that nothing breaks when INSTALLED_APPS has an app without models.py # which is common in some cases for things like admin extensions, reporting, etc. 'test_haystack.test_app_without_models', # Confirm that everything works with app labels which have more than one level of hierarchy # as reported in https://github.com/django-haystack/django-haystack/issues/1152 'test_haystack.test_app_with_hierarchy.contrib.django.hierarchal_app_django', 'test_haystack.test_app_using_appconfig.apps.SimpleTestAppConfig', ] TEMPLATES = [ { 'BACKEND': 'django.template.backends.django.DjangoTemplates', 'APP_DIRS': True, 'OPTIONS': { 'context_processors': [ 'django.contrib.auth.context_processors.auth', ] }, }, ] MIDDLEWARE = [ 'django.middleware.common.CommonMiddleware', 'django.contrib.sessions.middleware.SessionMiddleware', 'django.middleware.csrf.CsrfViewMiddleware', 'django.contrib.auth.middleware.AuthenticationMiddleware', 'django.contrib.messages.middleware.MessageMiddleware', ] ROOT_URLCONF = 'test_haystack.core.urls' HAYSTACK_ROUTERS = ['haystack.routers.DefaultRouter', 'test_haystack.multipleindex.routers.MultipleIndexRouter'] HAYSTACK_CONNECTIONS = { 'default': { 'ENGINE': 'test_haystack.mocks.MockEngine', }, 'whoosh': { 'ENGINE': 'haystack.backends.whoosh_backend.WhooshEngine', 'PATH': mkdtemp(prefix='test_whoosh_query'), 'INCLUDE_SPELLING': True, }, 'filtered_whoosh': { 'ENGINE': 'haystack.backends.whoosh_backend.WhooshEngine', 'PATH': mkdtemp(prefix='haystack-multipleindex-filtered-whoosh-tests-'), 'EXCLUDED_INDEXES': ['test_haystack.multipleindex.search_indexes.BarIndex'], }, 'elasticsearch': { 'ENGINE': 'haystack.backends.elasticsearch_backend.ElasticsearchSearchEngine', 'URL': os.environ.get('TEST_ELASTICSEARCH_1_URL', 'http://localhost:9200/'), 'INDEX_NAME': 'test_default', 'INCLUDE_SPELLING': True, }, 'simple': { 'ENGINE': 'haystack.backends.simple_backend.SimpleEngine', }, 'solr': { 'ENGINE': 'haystack.backends.solr_backend.SolrEngine', 'URL': os.environ.get('TEST_SOLR_URL', 'http://localhost:9001/solr/collection1'), 'ADMIN_URL': os.environ.get('TEST_SOLR_ADMIN_URL', 'http://localhost:9001/solr/admin/cores'), 'INCLUDE_SPELLING': True, }, } if 'elasticsearch' in HAYSTACK_CONNECTIONS: try: import elasticsearch if (2, ) <= elasticsearch.__version__ <= (3, ): HAYSTACK_CONNECTIONS['elasticsearch'].update({ 'ENGINE': 'haystack.backends.elasticsearch2_backend.Elasticsearch2SearchEngine' }) except ImportError: del HAYSTACK_CONNECTIONS['elasticsearch'] django-haystack-2.8.0/test_haystack/simple_tests/000077500000000000000000000000001325051407000221465ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/simple_tests/__init__.py000066400000000000000000000001141325051407000242530ustar00rootroot00000000000000# encoding: utf-8 import warnings warnings.simplefilter('ignore', Warning) django-haystack-2.8.0/test_haystack/simple_tests/search_indexes.py000066400000000000000000000015431325051407000255070ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from haystack import indexes from ..core.models import MockModel, ScoreMockModel class SimpleMockSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) name = indexes.CharField(model_attr='author') pub_date = indexes.DateTimeField(model_attr='pub_date') def get_model(self): return MockModel class SimpleMockScoreIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) score = indexes.CharField(model_attr='score') def get_model(self): return ScoreMockModel class SimpleMockUUIDModelIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, model_attr="characteristics") django-haystack-2.8.0/test_haystack/simple_tests/test_simple_backend.py000066400000000000000000000155751325051407000265340ustar00rootroot00000000000000# coding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from datetime import date from django.conf import settings from django.test import TestCase from django.test.utils import override_settings from haystack import connection_router, connections, indexes from haystack.query import SearchQuerySet from haystack.utils.loading import UnifiedIndex from ..core.models import MockModel, ScoreMockModel from ..mocks import MockSearchResult from .search_indexes import SimpleMockScoreIndex, SimpleMockSearchIndex class SimpleSearchBackendTestCase(TestCase): fixtures = ['base_data.json', 'bulk_data.json'] def setUp(self): super(SimpleSearchBackendTestCase, self).setUp() self.backend = connections['simple'].get_backend() ui = connections['simple'].get_unified_index() self.index = SimpleMockSearchIndex() ui.build(indexes=[self.index, SimpleMockScoreIndex()]) self.sample_objs = MockModel.objects.all() def test_update(self): self.backend.update(self.index, self.sample_objs) def test_remove(self): self.backend.remove(self.sample_objs[0]) def test_clear(self): self.backend.clear() def test_search(self): # No query string should always yield zero results. self.assertEqual(self.backend.search(u''), {'hits': 0, 'results': []}) self.assertEqual(self.backend.search(u'*')['hits'], 24) self.assertEqual(sorted([result.pk for result in self.backend.search(u'*')['results']]), [1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]) self.assertEqual(self.backend.search(u'daniel')['hits'], 23) self.assertEqual([result.pk for result in self.backend.search(u'daniel')['results']], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]) self.assertEqual(self.backend.search(u'should be a string')['hits'], 1) self.assertEqual([result.pk for result in self.backend.search(u'should be a string')['results']], [8]) # Ensure the results are ``SearchResult`` instances... self.assertEqual(self.backend.search(u'should be a string')['results'][0].score, 0) self.assertEqual(self.backend.search(u'index document')['hits'], 6) self.assertEqual([result.pk for result in self.backend.search(u'index document')['results']], [2, 3, 15, 16, 17, 18]) # Regression-ville self.assertEqual([result.object.id for result in self.backend.search(u'index document')['results']], [2, 3, 15, 16, 17, 18]) self.assertEqual(self.backend.search(u'index document')['results'][0].model, MockModel) # No support for spelling suggestions self.assertEqual(self.backend.search(u'Indx')['hits'], 0) self.assertFalse(self.backend.search(u'Indx').get('spelling_suggestion')) # No support for facets self.assertEqual(self.backend.search(u'', facets=['name']), {'hits': 0, 'results': []}) self.assertEqual(self.backend.search(u'daniel', facets=['name'])['hits'], 23) self.assertEqual(self.backend.search(u'', date_facets={'pub_date': {'start_date': date(2008, 2, 26), 'end_date': date(2008, 2, 26), 'gap': '/MONTH'}}), {'hits': 0, 'results': []}) self.assertEqual(self.backend.search(u'daniel', date_facets={'pub_date': {'start_date': date(2008, 2, 26), 'end_date': date(2008, 2, 26), 'gap': '/MONTH'}})['hits'], 23) self.assertEqual(self.backend.search(u'', query_facets={'name': '[* TO e]'}), {'hits': 0, 'results': []}) self.assertEqual(self.backend.search(u'daniel', query_facets={'name': '[* TO e]'})['hits'], 23) self.assertFalse(self.backend.search(u'').get('facets')) self.assertFalse(self.backend.search(u'daniel').get('facets')) # Note that only textual-fields are supported. self.assertEqual(self.backend.search(u'2009-06-18')['hits'], 0) # Ensure that swapping the ``result_class`` works. self.assertTrue(isinstance(self.backend.search(u'index document', result_class=MockSearchResult)['results'][0], MockSearchResult)) def test_filter_models(self): self.backend.update(self.index, self.sample_objs) self.assertEqual(self.backend.search(u'*', models=set([]))['hits'], 24) self.assertEqual(self.backend.search(u'*', models=set([MockModel]))['hits'], 23) def test_more_like_this(self): self.backend.update(self.index, self.sample_objs) self.assertEqual(self.backend.search(u'*')['hits'], 24) # Unsupported by 'simple'. Should see empty results. self.assertEqual(self.backend.more_like_this(self.sample_objs[0])['hits'], 0) def test_score_field_collision(self): index = connections['simple'].get_unified_index().get_index(ScoreMockModel) sample_objs = ScoreMockModel.objects.all() self.backend.update(index, self.sample_objs) # 42 is the in the match, which will be removed from the result self.assertEqual(self.backend.search(u'42')['results'][0].score, 0) @override_settings(DEBUG=True) class LiveSimpleSearchQuerySetTestCase(TestCase): fixtures = ['base_data.json', 'bulk_data.json'] def setUp(self): super(LiveSimpleSearchQuerySetTestCase, self).setUp() # Stow. self.old_ui = connections['simple'].get_unified_index() self.ui = UnifiedIndex() self.smmi = SimpleMockSearchIndex() self.ui.build(indexes=[self.smmi]) connections['simple']._index = self.ui self.sample_objs = MockModel.objects.all() self.sqs = SearchQuerySet(using='simple') def tearDown(self): # Restore. connections['simple']._index = self.old_ui super(LiveSimpleSearchQuerySetTestCase, self).tearDown() def test_general_queries(self): # For now, just make sure these don't throw an exception. # They won't work until the simple backend is improved. self.assertTrue(len(self.sqs.auto_query('daniel')) > 0) self.assertTrue(len(self.sqs.filter(text='index')) > 0) self.assertTrue(len(self.sqs.exclude(name='daniel')) > 0) self.assertTrue(len(self.sqs.order_by('-pub_date')) > 0) def test_general_queries_unicode(self): self.assertEqual(len(self.sqs.auto_query(u'Привет')), 0) def test_more_like_this(self): # MLT shouldn't be horribly broken. This used to throw an exception. mm1 = MockModel.objects.get(pk=1) self.assertEqual(len(self.sqs.filter(text=1).more_like_this(mm1)), 0) def test_values_queries(self): sqs = self.sqs.auto_query('daniel') self.assertTrue(len(sqs) > 0) flat_scores = sqs.values_list("score", flat=True) self.assertEqual(flat_scores[0], 0) scores = sqs.values_list("id", "score") self.assertEqual(scores[0], [1, 0]) scores_dict = sqs.values("id", "score") self.assertEqual(scores_dict[0], {"id": 1, "score": 0}) django-haystack-2.8.0/test_haystack/simple_tests/test_simple_query.py000066400000000000000000000025231325051407000262770ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.test import TestCase from haystack import connections from haystack.models import SearchResult from haystack.query import SQ class SimpleSearchQueryTestCase(TestCase): def setUp(self): super(SimpleSearchQueryTestCase, self).setUp() self.sq = connections['simple'].get_query() def test_build_query_all(self): self.assertEqual(self.sq.build_query(), '*') def test_build_query_single_word(self): self.sq.add_filter(SQ(content='hello')) self.assertEqual(self.sq.build_query(), 'hello') def test_build_query_multiple_word(self): self.sq.add_filter(SQ(name='foo')) self.sq.add_filter(SQ(name='bar')) self.assertEqual(self.sq.build_query(), 'foo bar') def test_set_result_class(self): # Assert that we're defaulting to ``SearchResult``. self.assertTrue(issubclass(self.sq.result_class, SearchResult)) # Custom class. class IttyBittyResult(object): pass self.sq.set_result_class(IttyBittyResult) self.assertTrue(issubclass(self.sq.result_class, IttyBittyResult)) # Reset to default. self.sq.set_result_class(None) self.assertTrue(issubclass(self.sq.result_class, SearchResult)) django-haystack-2.8.0/test_haystack/solr_tests/000077500000000000000000000000001325051407000216345ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/solr_tests/__init__.py000066400000000000000000000002141325051407000237420ustar00rootroot00000000000000# encoding: utf-8 import warnings warnings.simplefilter('ignore', Warning) from ..utils import check_solr def setup(): check_solr() django-haystack-2.8.0/test_haystack/solr_tests/content_extraction/000077500000000000000000000000001325051407000255465ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/solr_tests/content_extraction/test.pdf000066400000000000000000001360701325051407000272270ustar00rootroot00000000000000%PDF-1.3 % 4 0 obj << /Length 5 0 R /Filter /FlateDecode >> stream x\Ys~_:p#Nؖm9J)ٖv50%eR1ϯ@og歩z?/Ǻ/M|(|o>ܔf1۬vȨ[ZriTM^[[rcЎz8DFya] ׳]<]m?T*4CWCՙǪjOk#Z{юt%H8Pj&VJ“B9Oxm]S֪Ǽ>ؚI~ȻiLU歽\m 'e1WycM\1mSE^ӽ*Ʀv-Yv il)jP:6fP׃YeuguжtE{M7cє=Dve|@Bo9. ,vZo])n #aN ;_ON]#mSJ"Qd7sKmksN *Mә7 JA_'_j0EoVД ={[>/o61ŵzyhnh>:wvSiծ0wpnI9N\: N+JfE^:y_l&[;|wj߯|ۏ"Nw-=6dW,Y&J E[@EkhYMYfAu]uyyXP+mQkzjyp*$5+ȍрľZLDCTbeCgH%anJh!T;$A Ї2y@(9JA]!,AM{wB qWL<6kCӬX [QgK7[]Q}"L zÒWBIH5ڀ أ3EVuEiH7`#DN"n Ht6 ) \6iauK`<^nCd-&93 :F"6B<6e 4sмզ[ pNr:0aMR4OfG?`S< aP` rB\ v[zo`F0lV},$bX_hE;9b0TE|8^?3 k G/c03ΣTh8/ѕYIͤR=#.<`U!($dq)n2 >ܩwX(lSQ7{ wLfqިTހhIXÃĖW &g~2R-lvt??yX Nq$͙jL,s 4AbVE/PO1O4+M~Af V!eREij DEJmk2piC`r[DkT\ׄpY! M& 'hY'2`FYB 0P +ӣ/r rADD% 22Dm(CxէRwlQbn 24Z'ϬM'0L0? &4e ! 70ּ*J XE)_6 -JtSY@Eh,? bMELQvUd?RYAr݅6R:*6v/FC;LҞJ|oQ)TT/ʎ*l*ڪl]PUګGգO>{~z9Y;_ޥ\6c/ς-=b p$ Q=]L3Si)*0{<τ 070ITCܺ=CM3주@*&C>D΀;Uՙ" "VBk N$|],6W>%R 4v`^wMXpAnB`L Dr(t'G\{@றqz@V G$>B<`6;#-hbe%H@$""@|f4s8l`_\xy1T}Bid s ZiŠs&9Ukav6<iC );:2 5q_1"cc \-[S9g1,lr(!%>AHJu ͗~?у}.At^ kԆEq(PTO(e+[liNtJJrG6'e3LFȣ󢢃b쎲%#t~&Iw3u-ezmWo-8`XS wЇ)zPNԖSoJdET=6DžM}j!se@>tXw7E;RC7IzEͭCrV|NL֑eN2Q*_y`V) =4Ѣ=sFkXZ#Na6 Tpfs`ݼN c bA6W x6N,=d)¦vX' }]./zI9}l;(A"^䵧p okfj>iy}SH͙40)-QƁBgl=4LB3s|TDg88c1-!)+^$ AQn!(Kp %0EvQޫE}. @-vp̀`E0uvhφx*Ēb?4ጨimB]B"tk.Mq<+iPg>'Ch8YoN"aAx-XtBXu#70h@h<&)dDm`Z7λ^Wbc x;8 0z h&Gi|t8'H3lGa+ G{ -5U@P\@>ʣNRҔ{ TEVi $ $O .k [Nt|dy&P*Q {8CS EU_6m7}֌m>({Lf 6Kz5u1?;(dQU7ϋ3l5f޺HKlJ_V/~՝S _1ί)~ /L^+fz7b ^KmFZS`i3{eKؼG}fF˫GY`tζ/dvA%+.wٔ lL9?K"hi+N8xYZE+z lLUtGOY\ypU皖$m%Ix1%ȻCr )IGޚrfi7UTM1I.RB6NJ>fZ"*J챎7d <@[NA~$׃`K \'GɮGOrQ[Шi0c5-R :PA/O~ 6A (->1Jڃ i{f:=sRz$O*' < 圜ٽ`̡sv'>k7yrp$L1inB07\,ދR-'A8I-CYZg9R_/LzB/a Æ^ȉCwώO~_sWuIjo&C'7[Xxtpq y0PI/Ф7eД+d_)&\)K-%eЊ-`)î"N2hq wUh_aRӄ*ŭk rB'T{%!1 iPMdQ{T0=+ iN"c\p!Me#N{q'><8gßS:p/_Փj !亅RK[vicFvPl~~iW]I cV\L(e;63f eb٣v}"d}dɴ!v;ۑ0a@M ϒ%"N%mP$tzzyqx4犯xBϲ ̡UVDor;DF œ 7ҙNҏYRݺ톎_R LN[{O}GՋ4;DrejwnFJǦ?n>osڴ{T-gTa*/Sq>5ߘ2"/[Do㈘e 48 Oq>! endstream endobj 5 0 obj 4623 endobj 2 0 obj << /Type /Page /Parent 3 0 R /Resources 6 0 R /Contents 4 0 R /MediaBox [0 0 612 792] >> endobj 6 0 obj << /ProcSet [ /PDF /Text ] /ColorSpace << /Cs1 7 0 R >> /ExtGState << /Gs2 16 0 R /Gs1 17 0 R >> /Font << /TT2.1 13 0 R /TT1.0 11 0 R /TT3.1 15 0 R >> /XObject << /Fm1 8 0 R >> >> endobj 8 0 obj << /Length 9 0 R /Filter /FlateDecode /Type /XObject /Subtype /Form /FormType 1 /BBox [67 31 545 218] /Resources 10 0 R /Group << /S /Transparency /CS 18 0 R /I true /K false >> >> stream xe;n1 C{u ɒ=V9EIc恢H=qUQX okB_vWJM.f&Jќ$K"/c9)37sdņ}Xo\$[JbMwHmx țI[gcIJ%R GMTDUe]99Oү-HHU29LA0#O'?i.ta"L~7ͅe endstream endobj 9 0 obj 231 endobj 10 0 obj << /ProcSet [ /PDF ] /ColorSpace << /Cs1 7 0 R >> >> endobj 16 0 obj << /Type /ExtGState /ca 1 >> endobj 17 0 obj << /Type /ExtGState /ca 0.498 >> endobj 19 0 obj << /Length 20 0 R /N 3 /Alternate /DeviceRGB /Filter /FlateDecode >> stream xwTSϽ7" %z ;HQIP&vDF)VdTG"cE b PQDE݌k 5ޚYg}׺PtX4X\XffGD=HƳ.d,P&s"7C$ E6<~&S2)212 "įl+ɘ&Y4Pޚ%ᣌ\%g|eTI(L0_&l2E9r9hxgIbטifSb1+MxL 0oE%YmhYh~S=zU&ϞAYl/$ZUm@O ޜl^ ' lsk.+7oʿ9V;?#I3eE妧KD d9i,UQ h A1vjpԁzN6p\W p G@ K0ށiABZyCAP8C@&*CP=#t] 4}a ٰ;GDxJ>,_“@FXDBX$!k"EHqaYbVabJ0՘cVL6f3bձX'?v 6-V``[a;p~\2n5׌ &x*sb|! ߏƿ' Zk! $l$T4QOt"y\b)AI&NI$R$)TIj"]&=&!:dGrY@^O$ _%?P(&OJEBN9J@y@yCR nXZOD}J}/G3ɭk{%Oחw_.'_!JQ@SVF=IEbbbb5Q%O@%!BӥyҸM:e0G7ӓ e%e[(R0`3R46i^)*n*|"fLUo՝mO0j&jajj.ϧwϝ_4갺zj=U45nɚ4ǴhZ ZZ^0Tf%9->ݫ=cXgN].[7A\SwBOK/X/_Q>QG[ `Aaac#*Z;8cq>[&IIMST`ϴ kh&45ǢYYF֠9<|y+ =X_,,S-,Y)YXmĚk]c}džjcΦ浭-v};]N"&1=xtv(}'{'IߝY) Σ -rqr.d._xpUەZM׍vm=+KGǔ ^WWbj>:>>>v}/avO8 FV> 2 u/_$\BCv< 5 ]s.,4&yUx~xw-bEDCĻHGKwFGEGME{EEKX,YFZ ={$vrK .3\rϮ_Yq*©L_wד+]eD]cIIIOAu_䩔)3ѩiB%a+]3='/40CiU@ёL(sYfLH$%Y jgGeQn~5f5wugv5k֮\۹Nw]m mHFˍenQQ`hBBQ-[lllfjۗ"^bO%ܒY}WwvwXbY^Ю]WVa[q`id2JjGէ{׿m>PkAma꺿g_DHGGu;776ƱqoC{P38!9 ҝˁ^r۽Ug9];}}_~imp㭎}]/}.{^=}^?z8hc' O*?f`ϳgC/Oϩ+FFGGόzˌㅿ)ѫ~wgbk?Jި9mdwi獵ޫ?cǑOO?w| x&mf endstream endobj 20 0 obj 2612 endobj 7 0 obj [ /ICCBased 19 0 R ] endobj 21 0 obj << /Length 22 0 R /N 3 /Alternate /DeviceRGB /Filter /FlateDecode >> stream xTkPe:g >hndStCkWZ6!Hm\$~ًo:w> كo{ a"L"4M'S9'^qZ/USO^C+hMJ&G@Ӳylto߫c՚  5"Yi\t։15LsX g8ocግ#f45@ B:K@8i ΁'&.)@ry[:Vͦ#wQ?HBd(B acĪL"JitTy8;(Gx_^[%׎ŷQ麲uan7m QH^eOQu6Su 2%vX ^*l O—ޭˀq,>S%LdB1CZ$M9P 'w\/].r#E|!3>_oa۾d1Zӑz'=~V+cjJtO%mN |-bWO+ o ^ IH.;S]i_s9*p.7U^s.3u |^,<;c=ma>Vt.[՟Ϫ x# endstream endobj 22 0 obj 785 endobj 18 0 obj [ /ICCBased 21 0 R ] endobj 3 0 obj << /Type /Pages /MediaBox [0 0 612 792] /Count 1 /Kids [ 2 0 R ] >> endobj 23 0 obj << /Type /Catalog /Pages 3 0 R /Version /1.4 >> endobj 15 0 obj << /Type /Font /Subtype /TrueType /BaseFont /NFGLDK+Consolas-Bold /FontDescriptor 24 0 R /ToUnicode 25 0 R /FirstChar 33 /LastChar 53 /Widths [ 550 550 550 550 550 550 550 550 550 550 550 550 550 550 550 550 550 550 550 550 550 ] >> endobj 25 0 obj << /Length 26 0 R /Filter /FlateDecode >> stream x]j0E -E, PR^A~,,)tqGW#f$n{MK蝷 g֙fi :d9eih|?ʄ?P2qOvޢY<|۴^B,dVR^tx< U;>@dFKSІg*)t3_n]۪bnR ) k(Z @׬[(j@ =@)UJ;(@0P=f hR@S=`s]0H7AgŬ 4k8Ŭ jQL$.?\bӥO^_y0> H8 endstream endobj 26 0 obj 349 endobj 24 0 obj << /Type /FontDescriptor /FontName /NFGLDK+Consolas-Bold /Flags 4 /FontBBox [-133 -240 630 952] /ItalicAngle 0 /Ascent 743 /Descent -257 /CapHeight 660 /StemV 0 /Leading 171 /XHeight 495 /MaxWidth 550 /FontFile2 27 0 R >> endobj 27 0 obj << /Length 28 0 R /Length1 12684 /Filter /FlateDecode >> stream x՛y|TϹdd6Y2,2'$f% ! I`R @$Fl@)ֽXk]̀6TDRZWTd>>3aX۾;w9s W'Z2Hd$oU"}Y .  ,ue0,!jòbq/!K0L Ci0y+10ru_81z _ ®{WclVo+4@qч(rYEB!LzI∁dBTCYt=%_JTv}Sh~yU,rO'>KCL.Th‰ܹZ鼜+w/o____ޛs͝Njnk/Rj5qDM_^/o.9#A"g#jAGDV!*r(C"f!J3ň" D!C"rو,D&bz01Gd |tD""9Jxq6-ě?!@:5ī? #^A2w/"!^@^P4A+/A=zt?>A~Sݠ=}@nt3&Џ@?KQyhv$R}3qWLL8N:Y5΄NLؕ'6;}vg`7᠉& 4&[kX9b+QkqEM+M@=#&ii$@^]ҍ&K|ːe 2bӱTCNZfHN=פ֯OzCp" k:+gCtoe&]NKGJ- BFZ}Ѩy yH=(tE޹00B#l -X &Nhٹ H@u-ؑ:߀"8  פ[kW!(%%`ϻ!!KY^!0I%ocP)rFQ $qQkk,BD= j.#X^H(9^STNjpwǪx(l U|KsW @j*H{\b͆=U :ThT gb*fRK.*]Vy{-*!őJ.QY)FIuVf5qxx稁,E/vDY/Uo>1Ӯz 3jCM] w A bz1J Y$9A n7kˎQ,8,r8R#)S{xl5_۰*.v3ҟG{ %ʼ=3UC%i BacVdeB fɯc 6D@%V" 3, a@VOr'O{sI`͕op`Rc_s+`wBw0/1$\>@ܑm1ʣr8y "\5p+J *1c*-|&V? $2JRx*X{Ir]$5Zٕp0W0lX>f1Ꮚ/%"Ǘ2Z`e, ,>qFaAa vC>qNNn]΀M@ᩇ|z z8AaiY!R!dP:\䨑ʰ7@ .XZrDR۱N](cQ YEg;FZc \ V[֣hhyI}=.8DžT1ʽ4p"aݒy1Q=*?v_Tu^ mgkD-{):Tۡ,Q2EhRQ,xzaZgD C]*bu(h6:y?Mǟ;ľ64B\g.U1\""_JJ 4Z "s1̙@W> }Rʾ?n6I|j$zC]2Gt]i|êo\cF6<]HǸL.^Y> ?(PW|\|*+#wU'չn( 's`Sء(₫v$nzH g0r,]&4 X\]+{wJR6bٸ0n*|8|sqQcQ,jt%8>Mx>?7Xlt.?ca\hb>יR\9"9ctͩёj68)Ư?/QbsFC2' )-7@c""=7#ѣ}jD NEb*a+[Ieee"8i8>kβ $cvVM-Fc{/tOI:E?0e o銳ګ&?.בdM~nY|{,Sk^kr*Hnv4Oо2lMEL#CŎI;):ٔr%&) ⌐bBJi9wCsڬ2g-|x^ݡekYs+- ^șRaM<ͿfD=}š^,-f,1< }F8-%lAoV 1ʠaD'oPS 6NF3'6pxC Nk sgw$Kז%gwW&s/l/w'(:C=v s%CUZ]V`ɏdOU'Ah;١*<7R  42:h^Paw&NMJN=*u?+=(S1[^`)W({MݸM} JwWOIo\Y=ڑPW_ut8ø3i5u->~M𓙤>~bA<,lhSi%?ghN`]pХҀeUY/e])(4bdjnGsYk-5:E?Nڡ;m/ר g7my AtmrsEȲQNt>&{?Oh E'yϱ)&{Y(wCqLAuQ8@D&XְBpaJWUU[FVoM*Kڠm|dKZ.⚱[Ty}BIGqfA]q+1jPnjr1I!&>aRO죔&kVX~`E9F-ԜFjH)g2޶gUkdzSezUY/4u=791j!f(d)2s 1) NQČvfF9.)q0, c[2Æ G*^ \ ,"Ai %Xf͝?mvQpmG <ٹDo'{ʛ$3޸$y50QV77 .9OUo쫿SR[̗'rfX=+ڇfs3għp=N'-ݎnGn;l%+V X%*KKJϣ*i-c߷b1OEQ v/>7ӧBsS߿dVPQԸ\21fnhg஖H73)g }zQϹ=Bsl6_]aec玎dcK+H\?3Rm6ͨ6+<-ݲ6Fi#jݤn}R'aJ̣EϋCg=ilvfTr+f'nlNY}y({#6bAxpu#>3EK#<;&>SjF+Wj!g5rylHN,gS>̄ݮVtK+ͪh1Qï-:c:x8cϒEG#HodZgOVOz͊C&{qOk^t;pIKzόhm MsWֹoF>.O)lXk[bIdYo[KfqasSdg&.km]{N%=C.gpѵ#<žٛ!W`G~mzjMÑWC:kRg;SjNRbI̞S\}+{+V)&T endstream endobj 28 0 obj 7091 endobj 11 0 obj << /Type /Font /Subtype /TrueType /BaseFont /CCWAIP+ArialMT /FontDescriptor 29 0 R /Encoding /MacRomanEncoding /FirstChar 32 /LastChar 213 /Widths [ 278 0 0 556 0 0 0 0 333 333 0 0 278 333 278 278 0 0 0 0 0 0 0 0 0 0 278 0 0 0 0 0 0 0 0 0 722 0 611 778 722 278 0 0 556 0 722 0 0 0 722 667 611 722 0 0 0 0 0 0 0 0 0 0 0 556 556 500 556 556 278 556 556 222 222 500 222 833 556 556 556 556 333 500 278 556 500 722 500 500 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 222 ] >> endobj 29 0 obj << /Type /FontDescriptor /FontName /CCWAIP+ArialMT /Flags 32 /FontBBox [-665 -325 2000 1006] /ItalicAngle 0 /Ascent 905 /Descent -212 /CapHeight 670 /StemV 0 /Leading 33 /XHeight 530 /MaxWidth 2000 /FontFile2 30 0 R >> endobj 30 0 obj << /Length 31 0 R /Length1 24688 /Filter /FlateDecode >> stream x|y`EUuOLsd$3LB&H!IsP$S⢆Ⅾz !`@}⊊QE3TM߿驪oUWw񽫺W\H1oD]+.]qٲ\޳-]wi.o.\0w~.O' H.[}e.=|˲W< s-o+Z9m=i#!<`݋;?홹 BQ|EωJёAP. &yίn95YOs,nЇ%exI^tb!k \L #urztQOU%Ӥ]@RrI}SRT>4btJ]JRwqrd2G*Ym;#FPAF#lC8HR=чHA\D|EHqD9#lCPD=^Z'bH;v"ٵxieg7쮙Mt\:\!jsFҒ\NT4{>ɇN){8)%]#i&ĐܻK2(O"m*k,˾$na_ֵ6wa?ca~C_"((q#N.@G a?—*{1FG`ouWt믈#HS{Mm^*zHz>$fTD*=1 )E:هٛ$В77IaB ۀ&m[#eu({ Ianc:١p={1oE{E߈wH q5J{a <5:RiΛدw#.#A\P0a ϊGܸs5pOE(yL#9QrymnK2#>dyN@IHv Dbd1h 8O؏BV T* yZ>7r%\%> z hnnD\0—YЏED\P0Z/ќ/F#M!Va"ϱpሱQ>V=Ltba2:}op N 0[zۿ-t{ۓE{H $IHU"?ͼSH+3p=YG=o";"vʴ=Jy3|Swfh+k)mH8^w8?(v \34G9AR׬ZC FC5|*rq4n=.]6u\~ƝҖQM(6tf p(Ig+:^fTW\mB1Fg/vQ&ti `, YKq! \EStUx?Lr%^VzbuO*JԪ!]XN B1O<,y> :a1A3]#dN9BmG'Hџ{z@tocL>g$4N͖lJa*&d:Z~ͮq<:;80.w&!|ZA'[~!$fA;S3 ),凫@q]va9VKERJ(IMrjn9_>$g1ΕmMO^4Pl@ƿeeHL{#BzCX ADm|ZF 12sbJH^O刺m}g%lga~l &⸘-`PdmvZR%2i,-VK뤭RZzCzWtJGV\$'N>"O /4fL}fʶp~1  ff9Y3J곷dvG.z O+ U lgv}L>P,.%䗪US4#Mɮdan(5?7#On;n5*׷8*&7uT)xop, imH^psC˨tU@[NvjVV@i|NF Cv2bt0Kq)0w~zƆQXoy$MUH22DAI;lS'lnLKsq+J:>C'tِ!(+o޼)>kC1~&ײͣ[0S.ft# 2!z_NM,-`j7ɔu|co(onC񦹣;duF43}wt8{\`=wN@:M9;1~>4tt^-iOy`0y7]r7}&33wߝA`t :>3`tr8VͺIvm3Råns\L$]J緈wOu{u4\5jGNw~7+9/7ő36o}L$/0lTgMm-жIiV/v@t:S:v|n85Mw:"=m侚'$*euoTy%Wmx~C([7#_dʘ'3Ȅ)$#I:ƨF눆C(!a--$']@RcK.wUZ2hP͞fVl99>8"Dz$㍭5qvYTs3wA~-ORB?FDs8F}+& y^`DrP͠A/Oͺi Wqpcf'߳t37L .$s2'V>3h@ç~uW8u#Bn5Rl]J$%1DYXUdX@٬V1J:;\ ks `a1Vڊ:pV Qǥw'=Lh'!o(ЋꖊIg Q>e*B+T,&K`WI @ČJ GČ8ile[ -)HF5@ &]Jz@껐)͹aؒz"/I0{A*{=Q"S"FV`ׅb |;DN 4Ӂr vWU>ڏŋͽn .dn̶]t_QzgrG=7xEl&0V4>r^z_ަ:.뼫񓯟x㶗3ߴͭ.#"`e/c=yղThѶk5QUU:_a$`H5G!Vά9SΡV_ZmQ;'[+Ц@vS)AyLqⰶ x0T (Gȋ/v+}ݏYG]1 6`$nN;̄V|^HsiO.'riAa. (QpvH4+N' brGQH$>eb_LbtF{; 󱽍PԺ<sRrѴhq+Eh" ec1Ge*w Gfz;eʿ0ӏsl4׈Xѐ~-1Ss.zN?M:Ǎ jf$-2c,KQE @*ejZi:b]amfpAd_zڃc8NB3 Updz!H]~MWS&GQDMsQi0k{FךXYXyi<X㵪Ë{< r`<~3/vO&tA]*}~1;AhLL= "s[=(<+ryYD6"VE\ kGS`0͹wgHO**v|AIޓ:{Rˮdol['pi߮{D5V:[[5RUͺ1 o%aAg̓ɫղ((ՆGJ4DF.*5 fqĹ|@(PsP9r@Sh^Ԟ,fRIb  3KKzY_Z_S*qz}V}cu;[[ so%ݡd%ه& +$0ˈM!J} K4a}4l\ Z }PGR@sOdk+rG(vح֠Dqʠl4 BRQ: Nt U`) O_HhK}:u8l:ߩO~%D)>1{=DNgXxthR`{4ͭB'7$ Vd*RIhPON4{hfE]]{i'4]uÙo9bը,Eolp|ۆ&VU-){CV1?$>pEBv+ĀC"%D**Q*`bn*D+mV!ܢ0J0I J.;}kǚ? }f<ys&d?}7CK:F̲3>$a<[kö_Pݹҧ L»,)Nء&,9@V5q!͑jzsP N*f4Dq-/Ϝy+^o1n给?&~׾%/rܲ< (-eA{~Y{YY}P^MhHeEe-7osgOJ#q<'I >Y'\C{)(k)Ήz Zhp>LXQ`?ͭ]9 A?@"lڊ&0v1n-0#dx//Ǔ:@n †{x~̪V5xt7$/z>+-ZP食8aÞ_.{^Skq ]ywKz-M< ` hcFM6gid#,) WGWDD!:_] Fg}c;?s F`T7; ӅIKMG )mzCV jya5P|Ju=*:VVсb 6 CBUX `Ĩi] #`UR2l;MTz$2H ;B n*T8ֱr%;ZW/ BN+hohzQB"3VIyŇTQU:x%UT^{d&f%G+{혯Iv(n<1ٞWGM34mwj26ߴ2>yG_HP/Isriޡb{?6>5G{f/|;MO:t'9iU]XasTQp9~Eujq@'rrs5g.cVvjQ eG/gǵ]_=fUODUR  >8ZbڄQՓΡ]gp؅鬵d6b0܄`9:/xڷ,~󺖭OYr73oÙ!#olh!,s6GH8.fSeuĴܲjRPt1Ca1ʗOvN u/˟R2;бai'%-v k*V0Xܡw{@~߄ >a))N%AnW"YSc8UŪQ\V;Sp4avr3Xӟ*&ϸit{o} 7\(SOFB` p?=`nηA9ǣ4?R]`K6gXy,xKMެǐ ;[8`p5OT8RР$hA&+tR^B8r>x9 u4| XjÁtSدW#g0v|YS=7[}]O].8Kq)ŌE6J|V :!'t^V5]۶y[{)uIk[.KA#2@.7ZVܚ^hm*`A5-Zy/P 9KŇ\X|{:(6O}hXCii}b)9"eS:ΎҰGDr a~uJS86͗Wa?1lzXO &4Jl:J?˱5^ f/[q'*j'MH~!άS8!8ðY֑S6^LwSc&(aIG)B nFt, sYFǗUstro 8_O\[+o̟?&}3{^Jo)+/qq6^:w/pR5zb>,~J&JaRYbT Z atJEbaYng WM$%?.`K"j[aU}'|lo/d&rN_m8(0(vIAA *ͩX~OAY2p2!o EM&8r 9t(5Pl!j7. _'@B-T`FZ <צkո5K&Z;y{{pOvus1Q8O%-x&ZX[Җ-',*D,+,mm=EG-Y@Re&YJv5a"6yGe|BfDʇ圮̦7Yʂ\1|gh| .>x_^ٚKȐ7vtt8tLl fmk}&;tW' F}Js-% jnέ0;̺3iֵ89FUy@PA7td ~÷?q3_?+pAbes!,"YPKqqCKWh̭D=j3ֻKQDR7<(z(lLWkԮhGQƕ0*-&IiT,WLY^Q(S>UV55 f2ÒbFb6'T@wIL1+uIQ͠XxӰaˌӈEu \j(@a ؊c8'bƉd. dt?ܹJ:Iׄ[5PON o12ai7I"qaÃza_>{KyA\PP {ɛQu6V,=G>׮<9HvZ{V]><:3eƜ#I,#qB铟fei:{>\uj~$0ꁹ\Z[Z57N,n3o'":a"6Sք7zwt0l#Lv.N=N\1:X5*4nû Sژ`_e:+$3|AQ9$c^,?229BQ$ JW)Tv8A$'X|̑.wυ+aM, qٳqv 62&|cfHl ѐ] gq_.~!ax4!xw<N_'aNXۇ&B^kkK?~i9ix, ':镻~́`ʱUw!=AfXPA "rWPۼ Q=WP.<=<ʉkG+|t{"׼'wg[qWG 7 wOsI{Kϗr#+tpZ:$;O<'SQY{Ál@ *>fwgeږUh[t=ږUnkLdvUh[Pж?evh f:!.k^"=Umpr= pH{ 90p||+p~tFJ~P bm,wja>e̚]\zލujI[qN[,s} ^\1)Кw[keKƮzLN޳aʫW._67羃Q`$HNY_-V8'ܪQƚg(M˔Efs>=70Рs5ff[fߔ22|}{o~ gQLiiEmMe- '$@V}Tqr O σ8p'Kg|J(&67{\"H…C-1pK] |n flc.{زL5M\b"s+z%Z54&rQ4ꑛ~W?n~?ӵ} 6njNjt%|}A o^ ڔY$0nRH/1n}q\MGY$/̫,Q"%jA|mvhym̿$t 'ﻁwTx{h4唞(ѱ? []8\uN5Ck49*0*zdžs{ML$͎s)4;>q>jbU!c(ܸQ.\g7>Q."4dcpM^rO/S>KsLM sፇy'nztO=z" 'O%{Ù37_8_̩plfts0dfe)VƱu6>>8.p{S/;43/RVcSg+f|66-biwIe7VG8*N; g) GD=pD WB WK>*XWj1'3GE,| 0U`AƷ}a?dv JrvnW/d\k~Pw+W=c횇3yڏ3=z3o%FL+1Cujy$^T^-+b=.Hfj$A4K35E=Ê\w:Dz!(bQ')czWہ@"|r%q~6'wWc5i^wtG 5]1bo|uJԷ~B=VvbK~'rh˨E [n\_%şWb+mmͶζͶ/6/,[;%ΒdqIqAų&ؤS۝ %+gIDzx/P :|D:u8Pw kgK6MΏ&dk|T, 'wg0\|?(GYMB h .n23BukUjJ.XZ y*"- & r5/AWoԂ8I.W 2\;\{TɦdZ/@%ldo_L3 ͍2.AވhS0`h3ʛ Fp@=nƢ{<%d!?WNYNZS^0dZ7e@)ƿgfoaߤKw]R!jꋨЋlfFiESR2^A ,@s 4Uڰn_3ޟX |Q_9W_Q?UsvM?YO_oewLYmբ}蕫C /Y`y@ Ƭ+SjV\5|Yf>8+3݇wװGZƓmAlvJħ[RN [:"l6U uڦnQeiVppRc7P]9~@sDNT$@턏{$dxqu3洪J7[S @2'v0=ºK_ݻ=Fׁ_[|;z|| NyhQϊuT?7'V͙60;ɜ7壀|7/ NO jS|>Gy4?VoOg| qMG7Jۜs[mb`˄\]j@qIL/⯗m!b7r.@9UL܀~A%(կ'ɓo@ebwvuSo]J H:g=&3 U42S1g(Իw5Yف&J\vWffG.`"EX5)EqO!BqMi?iCJ-I ƒ1 |h^Y$W+:+MtAIѼsriyVݩF;B>'jgIħVM$f1n_ To?m:i89ֈpeB"JdV LLz5| X4,ÈhPYf)E"Ta"a3\m hȰDAnW~9?u֞NEl)-"SGeؙU8(j:ȱp'(K>u6H6Qa9%]>a<)ţ,A R ^jU搵lB-3d:<1Jx8HSK딫>?3RKww0q_a|t18{܊2d[߱;?qTN[V&b];EAېpn2p;n9TNm mp.eqCzJj)4y6۵Ե޵٥4ȧ#715X\}o'Ciq,X3a&ah:vwf27tK5#KB.;@,80 k;=4Ulvlh|lVUN:nw{JSO]w -:Q˵kR'nX&r׵.}u: hnzs BIO67ω9Y? \ǐl/!&Mza\:2EEpdw(I{~_Tl7gT1| _Gw\7U:ܮwl'پܓ\܍_׹GwiQ9J즞5 {ܵ]Mrla!P?9Sst\}OUO6=;2=`0?;z]}٥g;NoHNӿȡ<'*2`"%Y.nĻ |Ø*o5yt@98 _mtujpϔ&kzvYi5-?ܽe+΀sۀ >vEJq!R-(Xu:v ٭M'sj+ {^E%lXI}԰6MۯiιCdxP&)mt#*]Ê&b,wjɜ?++ŹEī=‚Kx/Mfe@_K E#PP {V䯕xKKx{jiQւEpNSOsNA55|uH*Nz}.OszGd>e4#dlݿ4eѯv|ɧ_HubӭX~ `E*A,#N]N|IwbY;ùC y =.JlRX_ӔȽȳ(o־uͮ[ܷxnޫ=n}^εn[o6\؋Q>5cHlE09^ ^'ּ8m` 3X-V03|~cax;4Z6l{;=NZDB`ri&ڤIPcW\z0F ^wIpI ':{kW~@ pÁ(KzpsĆO.Z9:kjE5yx(o6&OOr~U0Q-w%M̲MERvd/~Fu'g\}ߚ ײ%g~cDT,>~7]W?lʾ2ӟ4U0PcJ?/PHo!_B0⃶"<;Yg?,@`\VE|.*6p?򆡓A^ULƿGm_L^J@A`|&cX|2߂O&`m7M2IiyuƿO> ¿2rOJ _hvDG endstream endobj 31 0 obj 17328 endobj 13 0 obj << /Type /Font /Subtype /TrueType /BaseFont /NLHSGB+Consolas /FontDescriptor 32 0 R /ToUnicode 33 0 R /FirstChar 33 /LastChar 71 /Widths [ 550 550 550 550 550 550 550 550 550 550 550 550 550 550 550 550 550 550 550 550 550 550 550 550 550 550 550 550 550 550 550 550 550 550 550 550 550 550 550 ] >> endobj 33 0 obj << /Length 34 0 R /Filter /FlateDecode >> stream x]n@E|,E1XBHQH^, }}NS8yûhX^Cyliq]?9]&פS?dž]>̞zV4o%C7̜[7&}ѳosLs?ï=9^w:aq>*Sv>'C/Gyc":1TiSJ+>o*+е@M NE;m T{{k\- U# lC&F-P`+@^ *H*VJVAfdsE*ߘ5>JV}Yﵩ,/Wdȶ"kzʍj1YH*C YUSP".y53cg% zL> endobj 35 0 obj << /Length 36 0 R /Length1 15860 /Filter /FlateDecode >> stream x՛y|ELdA$Mr1ILNrC ! " 0 á!*(zEWqW@`TWEv{zw>=Ys,&2I8a&Q!Mmih M3'O M6~ $ƆI?i yfjNkY_6cJڟvvxcIs'!{&$b&#YqT.2zƘF| [_>ۥ&h~?G?8R%:^ԥU=RFUz%vIJ=$ t%WNa sU߅"?,o*/,%a. 09/9a~]9+|'߄VokaKasa*_LO9#'|,ia>CaN 0}aN 0 0'y[0GyK? sXCMa0'^a<¼.av K´ &[¼&6a EWyE,Klaf0yV<#:a)a a1a 0kY-*af0<( a 0˄_,f0 P{Y .5F͐sr4WONinLT=1eBuCz}>^-}|uԘcc#JΩU)9P%ݠVWV,R,b*E))y)In#) :k=<*߹\O.v\.q'.rˏ\\~r\7.r=?k._q\>W.O Q{@C.|\r{\\r1.G?q9\\se/O{h.*:=\vse'v.m\\vp5.۸l岅˫\^2\^ⲉˋ\^<\6py˳\sy:.Osy˓\8Ǹ(5\VsY.+<.\\\r e\pҿ=xL.\rqH:4v+fv%=(D. l%KX.(\bDs[2{\"%K80.\l\Bsr %??.%8=- ąr!.:Op~%p.΂wo[ _ 9+ | ΀OQGCp | '{]81po?u7^{n oBk`W*x 6&"x<6 9,X|gD= O'1< ր`x< tfpif0Z*G&lQE3X]>_[5F3tiNN-sTLz0__:ge"VCK(]VUTwU ect;uqVI5m>RF*| O/[D數#+k#jK܋wTy#=y[n_ߖl@>O^ ⯐-hgdAڐZf;:\gK?K'FH[YRvCf ^-j=ӭe-TcH%Pc Ʈ]SZĂ]/jʯm} xke^%[dC^Ǖoǵ#eDUĈE5lq#*n4ض߶z> o`$y^8WItY7Q-%q9{KNGj3 @Yh#9Er\F]G[]e픂 Eke!n?nښʊґ# Z< ?/ו38{Yef8$DZ-?Ǡi52V/ɅJQW)),4 ᦌzYE,㶳%](tKI/QȠd{bw,P:~eRkwWkԄ^\`wz{df2+Ɣdf4ܽm`w6irla$wYyMaADttGcunz,7L%j}c&|')jrvj [[-I0o`;Y)(t')XIōP6֬[#rj}SNCw.a%6OP7E& ^\^v2!Inm9$W-Ŗ+lRX{Ws{{J2ZVukb'Llfت KRUvYؖ@z\C*yRs%2Y;K#U aPGuXݪQ3D%j/q6q$&rC^<ՃTca9tb-8 u>G@"uD P1ŵ$vҚ)FvMhv~Lm;5PIgEL|P7RYs"0۲+lXoG hJs>Y!&Qѽ˒cM~FƠ"(8mBn e6 eurT=S!ݘYS;3?߄2IJ,v#'ӝ.4u 4;.Z ߙ"ėEI<| !$(|, 1XI$;#A$ P^GbHO4DKz{XY=H%8&YǩLЩm)FzHJNhzh՞IɺM"eC,U>||c5f0iLQ;cI#u{BnO оfCMN$ Au)w',"w!5z.}qI|rѹ48q:YY-YpSMOKV(NIckko_}y]ZҹHt[Zֺi8oHf"| 1 "ˍ:m@@U ՇJ.WSWXNg &J8':t-?qZiV =mX eY/+r,+A [ &c{)O#wV _17E1> u7z7壕;]6A:utL >1{.i^[Y; UFz@z!?ց\Sn7Zt99R:D93┯fߵu-GMvz|ڎ97=X]dWpvx}n/Pj=,Մ ,w\`G:0e :2Ό_:}`M6XMūF/Ys_.3!}죍;?(2ϠgF-//Nќ֚g}mTu'S#3B6v. GLQF_;]f!WPSA p"iv0 1#LV]=g`IZhϬ}cJNu_Mr-}dft>ΩWIG]xN+ jעY&_{/z{gQ TF6-=#u\ʎ[&Z:: yk˓l?8׏nͩMK= k6a}ujw}'I ϞQ urw^>_6QÄ˺(렗P;(Ѭj~@3[\(I9^H(Ydܬ9[MyaFnٞ?)/k|AVJMY7g5hSXU7﵊e)e3Y-;BJrڴmwELz쯽l 0,ƠkDl2cಠY|Ht 336xp-nA ^Nme~҉5P/2Xǁd8'+|@Fжӈ)YN7W脴ѣh~|U@w]q oQqQŻ^Eɲ!"C!suf1J r81d[fv1lœp8 xsCWq>Ef>rF_w`sL|΂ m(j^3:{PAPhs}o#`Hx\ovbPd)k؂p:S~; {Nj̰(M[/R<^9rI3#=*.]2J1_x[;RmRm΁quːܞi9>!IIrTjY(eDdF4qPh 0;\7l^ .47n d4z6ՆǷ.WF:o%V$?IZ2%Q6/KO |݃ >) aD~CO/7..i&Lcz5ߘpcK 6z{~FˎL MVn֬ђิqq="Q: FW-8@bQl^'Fk%Q烩s1w(, QE;=7"Gy ^EK\3:RWx̊R2׬Vyk]>VӗDumy|u^5ոJuHTa9l˫Yz*)-2CwQ&)wl~~}ˉeAc"c#JD˜dQ&r\4ã-7xT^e㝊XioZU^'Õt1Hͬ\ ٯ wLQ4l\y}WbCߗBX߷PMk2֚l$MG ] > kF۳FnϽз0Ni]wAUuAcę vb{;2 [*e;_xֹy=hy9m ç珜1$:a;筇Fwh%8GvaVm^VK3qnm`_ox̷ԧZWa3-utuw [ kɔܿkJmi۟1MTʍၷ3z23z' j ۤ 4`0Y%@}aԺ] jW2vbre2*-{{;oTv`Eg {I2ݹ;WD/#{ŘOVo`Xq˖d e`aLWbYu~Ė>4&ΰ` LvGg[V[=%[OYkpŸ 5G-;(}؂zd~YMG>= r"#΢IX#""9!!4<`xB1fWQ7 z?}~z/8O#+Xĉcs\Ǘe澱{moy[f෵M}ZSJn0hǧ!$oPdhHpoiUēݜ<&%K~) (RSynv{v>:8x՝̖EW 8|Ԧ<]?z*dW(5>FXJeYS'?=}?K7K]jTPsr# GjIVJKu>VJ5>RyXT;Tc?bNI8. 2g"GM>/{!H#ACCBƒ<kreWѯjnTʆ]zÙsĆtx= ?~O蔪Ļ;VT}DzdƉa!}#S+ؽ3y~]cIsGΤ!LjDu]NvϩJ6#۠ ^$Ll#4Qo޺jQG%&ߒnTn[ L7}O?$eoaej!CvE41h|}&DM.d_,e tz-2G̗O %HO+;XV>\=k[ȡNB*Fyߓnyrݜ7( BI佌gݲ$`f3F&vnAIp5nn %ﮘ|oMwᇢGҧU1yK# xq&_ĵI^tYx˗F~ud|SV1:VBw0nnlC/hiEekxA+^> endobj xref 0 44 0000000000 65535 f 0000046987 00000 n 0000004739 00000 n 0000009382 00000 n 0000000022 00000 n 0000004719 00000 n 0000004843 00000 n 0000008401 00000 n 0000005038 00000 n 0000005483 00000 n 0000005502 00000 n 0000017664 00000 n 0000000000 00000 n 0000035966 00000 n 0000000000 00000 n 0000009529 00000 n 0000005571 00000 n 0000005616 00000 n 0000009345 00000 n 0000005665 00000 n 0000008380 00000 n 0000008437 00000 n 0000009325 00000 n 0000009465 00000 n 0000010223 00000 n 0000009778 00000 n 0000010203 00000 n 0000010461 00000 n 0000017643 00000 n 0000018290 00000 n 0000018525 00000 n 0000035944 00000 n 0000036835 00000 n 0000036282 00000 n 0000036815 00000 n 0000037068 00000 n 0000046373 00000 n 0000046394 00000 n 0000046750 00000 n 0000046800 00000 n 0000046830 00000 n 0000046855 00000 n 0000046897 00000 n 0000046939 00000 n trailer << /Size 44 /Root 23 0 R /Info 1 0 R /ID [ <8cdd1ee6d08b3bde18010a765f7a1803> <8cdd1ee6d08b3bde18010a765f7a1803> ] >> startxref 47146 %%EOF django-haystack-2.8.0/test_haystack/solr_tests/server/000077500000000000000000000000001325051407000231425ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/solr_tests/server/.gitignore000066400000000000000000000000131325051407000251240ustar00rootroot00000000000000solr-*.tgz django-haystack-2.8.0/test_haystack/solr_tests/server/confdir/000077500000000000000000000000001325051407000245665ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/solr_tests/server/confdir/schema.xml000066400000000000000000001654331325051407000265640ustar00rootroot00000000000000 id id django-haystack-2.8.0/test_haystack/solr_tests/server/confdir/solrconfig.xml000066400000000000000000001600241325051407000274600ustar00rootroot00000000000000 6.5.0 ${solr.data.dir:} ${solr.lock.type:native} ${solr.ulog.dir:} ${solr.ulog.numVersionBuckets:65536} ${solr.autoCommit.maxTime:15000} false ${solr.autoSoftCommit.maxTime:-1} 1024 true 20 200 false explicit 10 default on true 10 5 5 true true 10 5 spellcheck explicit json true explicit text add-unknown-fields-to-the-schema solrpingquery all true ignored_ text text_en default text solr.DirectSolrSpellChecker internal 0.5 2 1 5 4 0.01 default on true 10 5 5 true true 10 5 spellcheck true tvComponent true false terms string elevate.xml explicit elevator 100 70 0.5 [-\w ,/\n\"']{20,200} ]]> ]]> ,, ,, ,, ,, ,]]> ]]> 10 .,!? WORD en US [^\w-\.] _ yyyy-MM-dd'T'HH:mm:ss.SSSZ yyyy-MM-dd'T'HH:mm:ss,SSSZ yyyy-MM-dd'T'HH:mm:ss.SSS yyyy-MM-dd'T'HH:mm:ss,SSS yyyy-MM-dd'T'HH:mm:ssZ yyyy-MM-dd'T'HH:mm:ss yyyy-MM-dd'T'HH:mmZ yyyy-MM-dd'T'HH:mm yyyy-MM-dd HH:mm:ss.SSSZ yyyy-MM-dd HH:mm:ss,SSSZ yyyy-MM-dd HH:mm:ss.SSS yyyy-MM-dd HH:mm:ss,SSS yyyy-MM-dd HH:mm:ssZ yyyy-MM-dd HH:mm:ss yyyy-MM-dd HH:mmZ yyyy-MM-dd HH:mm yyyy-MM-dd text/plain; charset=UTF-8 ${velocity.template.base.dir:} ${velocity.solr.resource.loader.enabled:true} ${velocity.params.resource.loader.enabled:false} 5 django-haystack-2.8.0/test_haystack/solr_tests/server/get-solr-download-url.py000077500000000000000000000034211325051407000276600ustar00rootroot00000000000000#!/usr/bin/env python # encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from itertools import chain import sys import requests # Try to import urljoin from the Python 3 reorganized stdlib first: try: from urllib.parse import urljoin except ImportError: from urlparse import urljoin if len(sys.argv) != 2: print('Usage: %s SOLR_VERSION' % sys.argv[0], file=sys.stderr) sys.exit(1) solr_version = sys.argv[1] tarball = 'solr-{0}.tgz'.format(solr_version) dist_path = 'lucene/solr/{0}/{1}'.format(solr_version, tarball) download_url = urljoin('https://archive.apache.org/dist/', dist_path) mirror_response = requests.get("https://www.apache.org/dyn/mirrors/mirrors.cgi/%s?asjson=1" % dist_path) if not mirror_response.ok: print('Apache mirror request returned HTTP %d' % mirror_response.status_code, file=sys.stderr) sys.exit(1) mirror_data = mirror_response.json() # Since the Apache mirrors are often unreliable and releases may disappear without notice we'll # try the preferred mirror, all of the alternates and backups, and fall back to the main Apache # archive server: for base_url in chain((mirror_data['preferred'], ), mirror_data['http'], mirror_data['backup'], ('https://archive.apache.org/dist/', )): test_url = urljoin(base_url, mirror_data['path_info']) # The Apache mirror script's response format has recently changed to exclude the actual file paths: if not test_url.endswith(tarball): test_url = urljoin(test_url, dist_path) if requests.head(test_url, allow_redirects=True).status_code == 200: download_url = test_url break else: print('None of the Apache mirrors have %s' % dist_path, file=sys.stderr) sys.exit(1) print(download_url) django-haystack-2.8.0/test_haystack/solr_tests/server/start-solr-test-server.sh000077500000000000000000000036511325051407000301010ustar00rootroot00000000000000#!/bin/bash set -e SOLR_VERSION=6.5.0 SOLR_DIR=solr SOLR_PORT=9001 cd $(dirname $0) export TEST_ROOT=$(pwd) export SOLR_ARCHIVE="${SOLR_VERSION}.tgz" if [ -d "${HOME}/download-cache/" ]; then export SOLR_ARCHIVE="${HOME}/download-cache/${SOLR_ARCHIVE}" fi if [ -f ${SOLR_ARCHIVE} ]; then # If the tarball doesn't extract cleanly, remove it so it'll download again: tar -tf ${SOLR_ARCHIVE} > /dev/null || rm ${SOLR_ARCHIVE} fi if [ ! -f ${SOLR_ARCHIVE} ]; then SOLR_DOWNLOAD_URL=$(python get-solr-download-url.py $SOLR_VERSION) curl -Lo $SOLR_ARCHIVE ${SOLR_DOWNLOAD_URL} || (echo "Unable to download ${SOLR_DOWNLOAD_URL}"; exit 2) fi echo "Extracting Solr ${SOLR_ARCHIVE} to `pwd`/${SOLR_DIR}" rm -rf ${SOLR_DIR} mkdir ${SOLR_DIR} FULL_SOLR_DIR=$(readlink -f ./${SOLR_DIR}) tar -C ${SOLR_DIR} -xf ${SOLR_ARCHIVE} --strip-components=1 export SOLR_LOGS_DIR="${FULL_SOLR_DIR}/logs" install -d ${SOLR_LOGS_DIR} echo "Changing into ${FULL_SOLR_DIR} " cd ${FULL_SOLR_DIR} echo "Creating Solr Core" ./bin/solr start -p ${SOLR_PORT} ./bin/solr create -c collection1 -p ${SOLR_PORT} -n basic_config ./bin/solr create -c mgmnt -p ${SOLR_PORT} echo "Solr system information:" curl --fail --silent 'http://localhost:9001/solr/admin/info/system?wt=json&indent=on' | python -m json.tool ./bin/solr stop -p ${SOLR_PORT} CONF_DIR=${TEST_ROOT}/confdir CORE_DIR=${FULL_SOLR_DIR}/server/solr/collection1 mv ${CORE_DIR}/conf/managed-schema ${CORE_DIR}/conf/managed-schema.old cp ${CONF_DIR}/* ${CORE_DIR}/conf/ echo 'Starting server' cd server # We use exec to allow process monitors to correctly kill the # actual Java process rather than this launcher script: export CMD="java -Djetty.port=${SOLR_PORT} -Djava.awt.headless=true -Dapple.awt.UIElement=true -jar start.jar --module=http -Dsolr.install.dir=${FULL_SOLR_DIR} -Dsolr.log.dir=${SOLR_LOGS_DIR}" if [ -z "${BACKGROUND_SOLR}" ]; then exec $CMD else exec $CMD >/dev/null & fi django-haystack-2.8.0/test_haystack/solr_tests/server/wait-for-solr000077500000000000000000000021321325051407000255730ustar00rootroot00000000000000#!/usr/bin/env python # encoding: utf-8 """Simple throttle to wait for Solr to start on busy test servers""" from __future__ import absolute_import, print_function, unicode_literals import sys import time import requests max_retries = 100 retry_count = 0 retry_delay = 15 status_url = 'http://localhost:9001/solr/collection1/admin/ping' while retry_count < max_retries: status_code = 0 try: r = requests.get(status_url) status_code = r.status_code if status_code == 200: sys.exit(0) except Exception as exc: print('Unhandled exception requesting %s: %s' % (status_url, exc), file=sys.stderr) retry_count += 1 print('Waiting {0} seconds for Solr to start (retry #{1}, status {2})'.format(retry_delay, retry_count, status_code), file=sys.stderr) time.sleep(retry_delay) print("Solr took too long to start (#%d retries)" % retry_count, file=sys.stderr) sys.exit(1) django-haystack-2.8.0/test_haystack/solr_tests/test_admin.py000066400000000000000000000055101325051407000243360ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.conf import settings from django.contrib.auth.models import User from django.test import TestCase from django.test.utils import override_settings from django.urls import reverse from haystack import connections, reset_search_queries from haystack.utils.loading import UnifiedIndex from ..core.models import MockModel from .test_solr_backend import clear_solr_index, SolrMockModelSearchIndex @override_settings(DEBUG=True) class SearchModelAdminTestCase(TestCase): fixtures = ['base_data.json', 'bulk_data.json'] def setUp(self): super(SearchModelAdminTestCase, self).setUp() # With the models setup, you get the proper bits. # Stow. self.old_ui = connections['solr'].get_unified_index() self.ui = UnifiedIndex() smmsi = SolrMockModelSearchIndex() self.ui.build(indexes=[smmsi]) connections['solr']._index = self.ui # Wipe it clean. clear_solr_index() # Force indexing of the content. smmsi.update(using='solr') superuser = User.objects.create_superuser( username='superuser', password='password', email='super@user.com', ) def tearDown(self): # Restore. connections['solr']._index = self.old_ui super(SearchModelAdminTestCase, self).tearDown() def test_usage(self): reset_search_queries() self.assertEqual(len(connections['solr'].queries), 0) self.assertEqual(self.client.login(username='superuser', password='password'), True) # First, non-search behavior. resp = self.client.get('/admin/core/mockmodel/') self.assertEqual(resp.status_code, 200) self.assertEqual(len(connections['solr'].queries), 0) self.assertEqual(resp.context['cl'].full_result_count, 23) # Then search behavior. resp = self.client.get('/admin/core/mockmodel/', data={'q': 'Haystack'}) self.assertEqual(resp.status_code, 200) self.assertEqual(len(connections['solr'].queries), 3) self.assertEqual(resp.context['cl'].full_result_count, 23) # Ensure they aren't search results. self.assertEqual(isinstance(resp.context['cl'].result_list[0], MockModel), True) result_pks = [i.pk for i in resp.context['cl'].result_list] self.assertIn(5, result_pks) # Make sure only changelist is affected. resp = self.client.get(reverse('admin:core_mockmodel_change', args=(1, ))) self.assertEqual(resp.status_code, 200) self.assertEqual(resp.context['original'].id, 1) self.assertTemplateUsed(resp, 'admin/change_form.html') # The Solr query count should be unchanged: self.assertEqual(len(connections['solr'].queries), 3) django-haystack-2.8.0/test_haystack/solr_tests/test_inputs.py000066400000000000000000000073651325051407000246020ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.test import TestCase from haystack import connections, inputs class SolrInputTestCase(TestCase): def setUp(self): super(SolrInputTestCase, self).setUp() self.query_obj = connections['solr'].get_query() def test_raw_init(self): raw = inputs.Raw('hello OR there, :you') self.assertEqual(raw.query_string, 'hello OR there, :you') self.assertEqual(raw.kwargs, {}) self.assertEqual(raw.post_process, False) raw = inputs.Raw('hello OR there, :you', test='really') self.assertEqual(raw.query_string, 'hello OR there, :you') self.assertEqual(raw.kwargs, {'test': 'really'}) self.assertEqual(raw.post_process, False) def test_raw_prepare(self): raw = inputs.Raw('hello OR there, :you') self.assertEqual(raw.prepare(self.query_obj), 'hello OR there, :you') def test_clean_init(self): clean = inputs.Clean('hello OR there, :you') self.assertEqual(clean.query_string, 'hello OR there, :you') self.assertEqual(clean.post_process, True) def test_clean_prepare(self): clean = inputs.Clean('hello OR there, :you') self.assertEqual(clean.prepare(self.query_obj), 'hello or there, \\:you') def test_exact_init(self): exact = inputs.Exact('hello OR there, :you') self.assertEqual(exact.query_string, 'hello OR there, :you') self.assertEqual(exact.post_process, True) def test_exact_prepare(self): exact = inputs.Exact('hello OR there, :you') self.assertEqual(exact.prepare(self.query_obj), u'"hello OR there, :you"') exact = inputs.Exact('hello OR there, :you', clean=True) self.assertEqual(exact.prepare(self.query_obj), u'"hello or there, \\:you"') def test_not_init(self): not_it = inputs.Not('hello OR there, :you') self.assertEqual(not_it.query_string, 'hello OR there, :you') self.assertEqual(not_it.post_process, True) def test_not_prepare(self): not_it = inputs.Not('hello OR there, :you') self.assertEqual(not_it.prepare(self.query_obj), u'NOT (hello or there, \\:you)') def test_autoquery_init(self): autoquery = inputs.AutoQuery('panic -don\'t "froody dude"') self.assertEqual(autoquery.query_string, 'panic -don\'t "froody dude"') self.assertEqual(autoquery.post_process, False) def test_autoquery_prepare(self): autoquery = inputs.AutoQuery('panic -don\'t "froody dude"') self.assertEqual(autoquery.prepare(self.query_obj), u'panic NOT don\'t "froody dude"') def test_altparser_init(self): altparser = inputs.AltParser('dismax') self.assertEqual(altparser.parser_name, 'dismax') self.assertEqual(altparser.query_string, '') self.assertEqual(altparser.kwargs, {}) self.assertEqual(altparser.post_process, False) altparser = inputs.AltParser('dismax', 'douglas adams', qf='author', mm=1) self.assertEqual(altparser.parser_name, 'dismax') self.assertEqual(altparser.query_string, 'douglas adams') self.assertEqual(altparser.kwargs, {'mm': 1, 'qf': 'author'}) self.assertEqual(altparser.post_process, False) def test_altparser_prepare(self): altparser = inputs.AltParser('dismax', 'douglas adams', qf='author', mm=1) self.assertEqual(altparser.prepare(self.query_obj), u"""_query_:"{!dismax mm=1 qf=author}douglas adams\"""") altparser = inputs.AltParser('dismax', 'Don\'t panic', qf='text author', mm=1) self.assertEqual(altparser.prepare(self.query_obj), u"""_query_:"{!dismax mm=1 qf='text author'}Don't panic\"""") django-haystack-2.8.0/test_haystack/solr_tests/test_solr_backend.py000066400000000000000000001630631325051407000257040ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import absolute_import, division, print_function, unicode_literals import datetime import logging as std_logging import os import unittest from decimal import Decimal from pkg_resources import parse_version import pysolr from django.conf import settings from django.test import TestCase from django.test.utils import override_settings from mock import patch from haystack import connections, indexes, reset_search_queries from haystack.exceptions import SkipDocument from haystack.inputs import AltParser, AutoQuery, Raw from haystack.models import SearchResult from haystack.query import SQ, RelatedSearchQuerySet, SearchQuerySet from haystack.utils.geo import Point from haystack.utils.loading import UnifiedIndex from ..core.models import AFourthMockModel, AnotherMockModel, ASixthMockModel, MockModel from ..mocks import MockSearchResult test_pickling = True try: import cPickle as pickle except ImportError: try: import pickle except ImportError: test_pickling = False def clear_solr_index(): # Wipe it clean. print('Clearing out Solr...') raw_solr = pysolr.Solr(settings.HAYSTACK_CONNECTIONS['solr']['URL']) raw_solr.delete(q='*:*') class SolrMockSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) name = indexes.CharField(model_attr='author', faceted=True) pub_date = indexes.DateTimeField(model_attr='pub_date') def get_model(self): return MockModel class SolrMockSearchIndexWithSkipDocument(SolrMockSearchIndex): def prepare_text(self, obj): if obj.author == 'daniel3': raise SkipDocument return u"Indexed!\n%s" % obj.id class SolrMockOverriddenFieldNameSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) name = indexes.CharField(model_attr='author', faceted=True, index_fieldname='name_s') pub_date = indexes.DateField(model_attr='pub_date', index_fieldname='pub_date_dt') today = indexes.IntegerField(index_fieldname='today_i') def prepare_today(self, obj): return datetime.datetime.now().day def get_model(self): return MockModel class SolrMaintainTypeMockSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) month = indexes.CharField(indexed=False) pub_date = indexes.DateTimeField(model_attr='pub_date') def prepare_month(self, obj): return "%02d" % obj.pub_date.month def get_model(self): return MockModel class SolrMockModelSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(model_attr='foo', document=True) name = indexes.CharField(model_attr='author') pub_date = indexes.DateTimeField(model_attr='pub_date') def get_model(self): return MockModel class SolrAnotherMockModelSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True) name = indexes.CharField(model_attr='author') pub_date = indexes.DateTimeField(model_attr='pub_date') def get_model(self): return AnotherMockModel def prepare_text(self, obj): return u"You might be searching for the user %s" % obj.author class SolrBoostMockSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField( document=True, use_template=True, template_name='search/indexes/core/mockmodel_template.txt' ) author = indexes.CharField(model_attr='author', weight=2.0) editor = indexes.CharField(model_attr='editor') pub_date = indexes.DateTimeField(model_attr='pub_date') def get_model(self): return AFourthMockModel class SolrRoundTripSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, default='') name = indexes.CharField() is_active = indexes.BooleanField() post_count = indexes.IntegerField() average_rating = indexes.FloatField() price = indexes.DecimalField() pub_date = indexes.DateField() created = indexes.DateTimeField() tags = indexes.MultiValueField() sites = indexes.MultiValueField() def get_model(self): return MockModel def prepare(self, obj): prepped = super(SolrRoundTripSearchIndex, self).prepare(obj) prepped.update({ 'text': 'This is some example text.', 'name': 'Mister Pants', 'is_active': True, 'post_count': 25, 'average_rating': 3.6, 'price': Decimal('24.99'), 'pub_date': datetime.date(2009, 11, 21), 'created': datetime.datetime(2009, 11, 21, 21, 31, 00), 'tags': ['staff', 'outdoor', 'activist', 'scientist'], 'sites': [3, 5, 1], }) return prepped class SolrComplexFacetsMockSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, default='') name = indexes.CharField(faceted=True) is_active = indexes.BooleanField(faceted=True) post_count = indexes.IntegerField() post_count_i = indexes.FacetIntegerField(facet_for='post_count') average_rating = indexes.FloatField(faceted=True) pub_date = indexes.DateField(faceted=True) created = indexes.DateTimeField(faceted=True) sites = indexes.MultiValueField(faceted=True) def get_model(self): return MockModel class SolrAutocompleteMockModelSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(model_attr='foo', document=True) name = indexes.CharField(model_attr='author') pub_date = indexes.DateTimeField(model_attr='pub_date') text_auto = indexes.EdgeNgramField(model_attr='foo') name_auto = indexes.EdgeNgramField(model_attr='author') def get_model(self): return MockModel class SolrSpatialSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(model_attr='name', document=True) location = indexes.LocationField() def prepare_location(self, obj): return "%s,%s" % (obj.lat, obj.lon) def get_model(self): return ASixthMockModel class SolrQuotingMockSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) def get_model(self): return MockModel def prepare_text(self, obj): return u"""Don't panic but %s has been iñtërnâtiônàlizéð""" % obj.author class SolrSearchBackendTestCase(TestCase): def setUp(self): super(SolrSearchBackendTestCase, self).setUp() # Wipe it clean. self.raw_solr = pysolr.Solr(settings.HAYSTACK_CONNECTIONS['solr']['URL']) clear_solr_index() # Stow. self.old_ui = connections['solr'].get_unified_index() self.ui = UnifiedIndex() self.smmi = SolrMockSearchIndex() self.smmidni = SolrMockSearchIndexWithSkipDocument() self.smtmmi = SolrMaintainTypeMockSearchIndex() self.smofnmi = SolrMockOverriddenFieldNameSearchIndex() self.ui.build(indexes=[self.smmi]) connections['solr']._index = self.ui self.sb = connections['solr'].get_backend() self.sq = connections['solr'].get_query() self.sample_objs = [] for i in range(1, 4): mock = MockModel() mock.id = i mock.author = 'daniel%s' % i mock.pub_date = datetime.date(2009, 2, 25) - datetime.timedelta(days=i) self.sample_objs.append(mock) def tearDown(self): connections['solr']._index = self.old_ui super(SolrSearchBackendTestCase, self).tearDown() def test_non_silent(self): bad_sb = connections['solr'].backend('bad', URL='http://omg.wtf.bbq:1000/solr', SILENTLY_FAIL=False, TIMEOUT=1) try: bad_sb.update(self.smmi, self.sample_objs) self.fail() except: pass try: bad_sb.remove('core.mockmodel.1') self.fail() except: pass try: bad_sb.clear() self.fail() except: pass try: bad_sb.search('foo') self.fail() except: pass def test_update(self): self.sb.update(self.smmi, self.sample_objs) results = self.raw_solr.search('*:*') for result in results: del result['_version_'] # Check what Solr thinks is there. self.assertEqual(results.hits, 3) self.assertEqual(results.docs, [ { 'django_id': '1', 'django_ct': 'core.mockmodel', 'name': 'daniel1', 'name_exact': 'daniel1', 'text': 'Indexed!\n1', 'pub_date': '2009-02-24T00:00:00Z', 'id': 'core.mockmodel.1' }, { 'django_id': '2', 'django_ct': 'core.mockmodel', 'name': 'daniel2', 'name_exact': 'daniel2', 'text': 'Indexed!\n2', 'pub_date': '2009-02-23T00:00:00Z', 'id': 'core.mockmodel.2' }, { 'django_id': '3', 'django_ct': 'core.mockmodel', 'name': 'daniel3', 'name_exact': 'daniel3', 'text': 'Indexed!\n3', 'pub_date': '2009-02-22T00:00:00Z', 'id': 'core.mockmodel.3' } ]) def test_update_with_SkipDocument_raised(self): self.sb.update(self.smmidni, self.sample_objs) res = self.raw_solr.search('*:*') # Check what Solr thinks is there. self.assertEqual(res.hits, 2) self.assertListEqual( sorted([x['id'] for x in res.docs]), ['core.mockmodel.1', 'core.mockmodel.2'] ) def test_remove(self): self.sb.update(self.smmi, self.sample_objs) self.assertEqual(self.raw_solr.search('*:*').hits, 3) self.sb.remove(self.sample_objs[0]) results = self.raw_solr.search('*:*') for result in results: del result['_version_'] self.assertEqual(results.hits, 2) self.assertEqual(results.docs, [ { 'django_id': '2', 'django_ct': 'core.mockmodel', 'name': 'daniel2', 'name_exact': 'daniel2', 'text': 'Indexed!\n2', 'pub_date': '2009-02-23T00:00:00Z', 'id': 'core.mockmodel.2' }, { 'django_id': '3', 'django_ct': 'core.mockmodel', 'name': 'daniel3', 'name_exact': 'daniel3', 'text': 'Indexed!\n3', 'pub_date': '2009-02-22T00:00:00Z', 'id': 'core.mockmodel.3' } ]) def test_clear(self): self.sb.update(self.smmi, self.sample_objs) self.assertEqual(self.raw_solr.search('*:*').hits, 3) self.sb.clear() self.assertEqual(self.raw_solr.search('*:*').hits, 0) self.sb.update(self.smmi, self.sample_objs) self.assertEqual(self.raw_solr.search('*:*').hits, 3) self.sb.clear([AnotherMockModel]) self.assertEqual(self.raw_solr.search('*:*').hits, 3) self.sb.clear([MockModel]) self.assertEqual(self.raw_solr.search('*:*').hits, 0) self.sb.update(self.smmi, self.sample_objs) self.assertEqual(self.raw_solr.search('*:*').hits, 3) self.sb.clear([AnotherMockModel, MockModel]) self.assertEqual(self.raw_solr.search('*:*').hits, 0) def test_alternate_index_fieldname(self): self.ui.build(indexes=[self.smofnmi]) connections['solr']._index = self.ui self.sb.update(self.smofnmi, self.sample_objs) search = self.sb.search('*') self.assertEqual(search['hits'], 3) results = search['results'] today = datetime.datetime.now().day self.assertEqual([result.today for result in results], [today, today, today]) self.assertEqual([result.name for result in results], ['daniel1', 'daniel2', 'daniel3']) self.assertEqual([result.pub_date for result in results], [datetime.date(2009, 2, 25) - datetime.timedelta(days=1), datetime.date(2009, 2, 25) - datetime.timedelta(days=2), datetime.date(2009, 2, 25) - datetime.timedelta(days=3)]) # revert it back self.ui.build(indexes=[self.smmi]) connections['solr']._index = self.ui def test_search(self): self.sb.update(self.smmi, self.sample_objs) self.assertEqual(self.raw_solr.search('*:*').hits, 3) self.assertEqual(self.sb.search(''), {'hits': 0, 'results': []}) self.assertEqual(self.sb.search('*:*')['hits'], 3) self.assertEqual([result.pk for result in self.sb.search('*:*')['results']], ['1', '2', '3']) self.assertEqual(self.sb.search('', highlight=True), {'hits': 0, 'results': []}) self.assertEqual(self.sb.search('Index', highlight=True)['hits'], 3) self.assertEqual([result.highlighted['text'][0] for result in self.sb.search('Index', highlight=True)['results']], ['Indexed!\n1', 'Indexed!\n2', 'Indexed!\n3']) # shortened highlighting options highlight_dict = {'simple.pre': '', 'simple.post': ''} self.assertEqual(self.sb.search('', highlight=highlight_dict), {'hits': 0, 'results': []}) self.assertEqual(self.sb.search('Index', highlight=highlight_dict)['hits'], 3) self.assertEqual([result.highlighted['text'][0] for result in self.sb.search('Index', highlight=highlight_dict)['results']], ['Indexed!\n1', 'Indexed!\n2', 'Indexed!\n3']) # full-form highlighting options highlight_dict = {'hl.simple.pre': '', 'hl.simple.post': ''} self.assertEqual([result.highlighted['text'][0] for result in self.sb.search('Index', highlight=highlight_dict)['results']], ['Indexed!\n1', 'Indexed!\n2', 'Indexed!\n3']) self.assertEqual(self.sb.search('Indx')['hits'], 0) self.assertEqual(self.sb.search('indax')['spelling_suggestion'], 'index') self.assertEqual(self.sb.search('Indx', spelling_query='indexy')['spelling_suggestion'], 'index') self.assertEqual(self.sb.search('', facets={'name': {}}), {'hits': 0, 'results': []}) results = self.sb.search('Index', facets={'name': {}}) self.assertEqual(results['hits'], 3) self.assertEqual(results['facets']['fields']['name'], [('daniel1', 1), ('daniel2', 1), ('daniel3', 1)]) self.assertEqual(self.sb.search('', date_facets={'pub_date': {'start_date': datetime.date(2008, 2, 26), 'end_date': datetime.date(2008, 3, 26), 'gap_by': 'month', 'gap_amount': 1}}), {'hits': 0, 'results': []}) results = self.sb.search('Index', date_facets={'pub_date': {'start_date': datetime.date(2008, 2, 26), 'end_date': datetime.date(2008, 3, 26), 'gap_by': 'month', 'gap_amount': 1}}) self.assertEqual(results['hits'], 3) # DRL_TODO: Correct output but no counts. Another case of needing better test data? # self.assertEqual(results['facets']['dates']['pub_date'], {'end': '2008-02-26T00:00:00Z', 'gap': '/MONTH'}) self.assertEqual(self.sb.search('', query_facets=[('name', '[* TO e]')]), {'hits': 0, 'results': []}) results = self.sb.search('Index', query_facets=[('name', '[* TO e]')]) self.assertEqual(results['hits'], 3) self.assertEqual(results['facets']['queries'], {'name:[* TO e]': 3}) self.assertEqual(self.sb.search('', stats={}), {'hits': 0, 'results': []}) results = self.sb.search('*:*', stats={'name': ['name']}) self.assertEqual(results['hits'], 3) self.assertEqual(results['stats']['name']['count'], 3) self.assertEqual(self.sb.search('', narrow_queries=set(['name:daniel1'])), {'hits': 0, 'results': []}) results = self.sb.search('Index', narrow_queries=set(['name:daniel1'])) self.assertEqual(results['hits'], 1) # Ensure that swapping the ``result_class`` works. results = self.sb.search(u'index', result_class=MockSearchResult) self.assertIsInstance(self.sb.search(u'index', result_class=MockSearchResult)['results'][0], MockSearchResult) # Check the use of ``limit_to_registered_models``. self.assertEqual(self.sb.search('', limit_to_registered_models=False), {'hits': 0, 'results': []}) self.assertEqual(self.sb.search('*:*', limit_to_registered_models=False)['hits'], 3) self.assertEqual([result.pk for result in self.sb.search('*:*', limit_to_registered_models=False)['results']], ['1', '2', '3']) # Stow. old_limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) settings.HAYSTACK_LIMIT_TO_REGISTERED_MODELS = False self.assertEqual(self.sb.search(''), {'hits': 0, 'results': []}) self.assertEqual(self.sb.search('*:*')['hits'], 3) self.assertEqual([result.pk for result in self.sb.search('*:*')['results']], ['1', '2', '3']) # Restore. settings.HAYSTACK_LIMIT_TO_REGISTERED_MODELS = old_limit_to_registered_models def test_spelling(self): self.sb.update(self.smmi, self.sample_objs) self.assertEqual(self.sb.search('Indx')['hits'], 0) self.assertEqual(self.sb.search('indax')['spelling_suggestion'], 'index') self.assertEqual(self.sb.search('Indx', spelling_query='indexy')['spelling_suggestion'], 'index') def test_spatial_search_parameters(self): p1 = Point(1.23, 4.56) kwargs = self.sb.build_search_kwargs('*:*', distance_point={'field': 'location', 'point': p1}, sort_by='distance asc') # Points in Solr are lat, lon pairs but Django GIS Point() uses lon, lat so we'll check for the flip # See https://django-haystack.readthedocs.io/en/latest/spatial.html#points self.assertEqual(kwargs.get('pt'), '4.56,1.23') self.assertEqual(kwargs.get('sfield'), 'location') self.assertEqual(kwargs.get('sort'), 'geodist() asc') def test_altparser_query(self): self.sb.update(self.smmi, self.sample_objs) results = self.sb.search(AltParser('dismax', "daniel1", qf='name', mm=1).prepare(self.sq)) self.assertEqual(results['hits'], 1) # This should produce exactly the same result since all we have are mockmodel instances but we simply # want to confirm that using the AltParser doesn't break other options: results = self.sb.search(AltParser('dismax', 'daniel1', qf='name', mm=1).prepare(self.sq), narrow_queries=set(('django_ct:core.mockmodel', ))) self.assertEqual(results['hits'], 1) results = self.sb.search(AltParser('dismax', '+indexed +daniel1', qf='text name', mm=1).prepare(self.sq)) self.assertEqual(results['hits'], 1) self.sq.add_filter(SQ(name=AltParser('dismax', 'daniel1', qf='name', mm=1))) self.sq.add_filter(SQ(text='indexed')) new_q = self.sq._clone() new_q._reset() new_q.add_filter(SQ(name='daniel1')) new_q.add_filter(SQ(text=AltParser('dismax', 'indexed', qf='text'))) results = new_q.get_results() self.assertEqual(len(results), 1) self.assertEqual(results[0].id, 'core.mockmodel.1') def test_raw_query(self): self.sb.update(self.smmi, self.sample_objs) # Ensure that the raw bits have proper parenthesis. new_q = self.sq._clone() new_q._reset() new_q.add_filter(SQ(content=Raw("{!dismax qf='title^2 text' mm=1}my query"))) results = new_q.get_results() self.assertEqual(len(results), 0) def test_altparser_quoting(self): test_objs = [ MockModel(id=1, author="Foo d'Bar", pub_date=datetime.date.today()), MockModel(id=2, author="Baaz Quuz", pub_date=datetime.date.today()), ] self.sb.update(SolrQuotingMockSearchIndex(), test_objs) results = self.sb.search(AltParser('dismax', "+don't +quuz", qf='text').prepare(self.sq)) self.assertEqual(results['hits'], 1) def test_more_like_this(self): self.sb.update(self.smmi, self.sample_objs) self.assertEqual(self.raw_solr.search('*:*').hits, 3) # A functional MLT example with enough data to work is below. Rely on # this to ensure the API is correct enough. self.assertEqual(self.sb.more_like_this(self.sample_objs[0])['hits'], 0) self.assertEqual([result.pk for result in self.sb.more_like_this(self.sample_objs[0])['results']], []) def test_build_schema(self): old_ui = connections['solr'].get_unified_index() (content_field_name, fields) = self.sb.build_schema(old_ui.all_searchfields()) self.assertEqual(content_field_name, 'text') self.assertEqual(len(fields), 4) self.assertEqual(sorted(fields, key=lambda x: x['field_name']), [ { 'indexed': 'true', 'type': 'text_en', 'stored': 'true', 'field_name': 'name', 'multi_valued': 'false' }, { 'indexed': 'true', 'field_name': 'name_exact', 'stored': 'true', 'type': 'string', 'multi_valued': 'false' }, { 'indexed': 'true', 'type': 'date', 'stored': 'true', 'field_name': 'pub_date', 'multi_valued': 'false' }, { 'indexed': 'true', 'type': 'text_en', 'stored': 'true', 'field_name': 'text', 'multi_valued': 'false' }, ]) ui = UnifiedIndex() ui.build(indexes=[SolrComplexFacetsMockSearchIndex()]) (content_field_name, fields) = self.sb.build_schema(ui.all_searchfields()) self.assertEqual(content_field_name, 'text') self.assertEqual(len(fields), 15) fields = sorted(fields, key=lambda field: field['field_name']) self.assertEqual(fields, [ { 'field_name': 'average_rating', 'indexed': 'true', 'multi_valued': 'false', 'stored': 'true', 'type': 'float' }, { 'field_name': 'average_rating_exact', 'indexed': 'true', 'multi_valued': 'false', 'stored': 'true', 'type': 'float' }, { 'field_name': 'created', 'indexed': 'true', 'multi_valued': 'false', 'stored': 'true', 'type': 'date' }, { 'field_name': 'created_exact', 'indexed': 'true', 'multi_valued': 'false', 'stored': 'true', 'type': 'date' }, { 'field_name': 'is_active', 'indexed': 'true', 'multi_valued': 'false', 'stored': 'true', 'type': 'boolean' }, { 'field_name': 'is_active_exact', 'indexed': 'true', 'multi_valued': 'false', 'stored': 'true', 'type': 'boolean' }, { 'field_name': 'name', 'indexed': 'true', 'multi_valued': 'false', 'stored': 'true', 'type': 'text_en' }, { 'field_name': 'name_exact', 'indexed': 'true', 'multi_valued': 'false', 'stored': 'true', 'type': 'string' }, { 'field_name': 'post_count', 'indexed': 'true', 'multi_valued': 'false', 'stored': 'true', 'type': 'long' }, { 'field_name': 'post_count_i', 'indexed': 'true', 'multi_valued': 'false', 'stored': 'true', 'type': 'long' }, { 'field_name': 'pub_date', 'indexed': 'true', 'multi_valued': 'false', 'stored': 'true', 'type': 'date' }, { 'field_name': 'pub_date_exact', 'indexed': 'true', 'multi_valued': 'false', 'stored': 'true', 'type': 'date' }, { 'field_name': 'sites', 'indexed': 'true', 'multi_valued': 'true', 'stored': 'true', 'type': 'text_en' }, { 'field_name': 'sites_exact', 'indexed': 'true', 'multi_valued': 'true', 'stored': 'true', 'type': 'string' }, { 'field_name': 'text', 'indexed': 'true', 'multi_valued': 'false', 'stored': 'true', 'type': 'text_en' } ]) def test_verify_type(self): old_ui = connections['solr'].get_unified_index() ui = UnifiedIndex() smtmmi = SolrMaintainTypeMockSearchIndex() ui.build(indexes=[smtmmi]) connections['solr']._index = ui sb = connections['solr'].get_backend() sb.update(smtmmi, self.sample_objs) self.assertEqual(sb.search('*:*')['hits'], 3) self.assertEqual([result.month for result in sb.search('*:*')['results']], [u'02', u'02', u'02']) connections['solr']._index = old_ui class CaptureHandler(std_logging.Handler): logs_seen = [] def emit(self, record): CaptureHandler.logs_seen.append(record) @patch("pysolr.Solr._send_request", side_effect=pysolr.SolrError) @patch("logging.Logger.log") class FailedSolrSearchBackendTestCase(TestCase): def test_all_cases(self, mock_send_request, mock_log): self.sample_objs = [] for i in range(1, 4): mock = MockModel() mock.id = i mock.author = 'daniel%s' % i mock.pub_date = datetime.date(2009, 2, 25) - datetime.timedelta(days=i) self.sample_objs.append(mock) # Setup the rest of the bits. ui = UnifiedIndex() smmi = SolrMockSearchIndex() ui.build(indexes=[smmi]) connections['solr']._index = ui sb = connections['solr'].get_backend() # Prior to the addition of the try/except bits, these would all fail miserably. sb.update(smmi, self.sample_objs) self.assertEqual(mock_log.call_count, 1) sb.remove(self.sample_objs[0]) self.assertEqual(mock_log.call_count, 2) sb.search('search') self.assertEqual(mock_log.call_count, 3) sb.more_like_this(self.sample_objs[0]) self.assertEqual(mock_log.call_count, 4) sb.clear([MockModel]) self.assertEqual(mock_log.call_count, 5) sb.clear() self.assertEqual(mock_log.call_count, 6) class LiveSolrSearchQueryTestCase(TestCase): fixtures = ['base_data.json'] def setUp(self): super(LiveSolrSearchQueryTestCase, self).setUp() # Wipe it clean. clear_solr_index() # Stow. self.old_ui = connections['solr'].get_unified_index() self.ui = UnifiedIndex() self.smmi = SolrMockSearchIndex() self.ui.build(indexes=[self.smmi]) connections['solr']._index = self.ui self.sb = connections['solr'].get_backend() self.sq = connections['solr'].get_query() # Force indexing of the content. self.smmi.update('solr') def tearDown(self): connections['solr']._index = self.old_ui super(LiveSolrSearchQueryTestCase, self).tearDown() def test_get_spelling(self): self.sq.add_filter(SQ(content='Indexy')) # Default collate + spelling path self.assertEqual(self.sq.get_spelling_suggestion(), u'(index)') self.assertEqual(self.sq.get_spelling_suggestion('indexy'), u'(index)') # Just spelling path self.sq.run(spelling_query='Indexy', collate=False) self.assertEqual(self.sq._spelling_suggestion, u'index') def test_log_query(self): reset_search_queries() self.assertEqual(len(connections['solr'].queries), 0) with self.settings(DEBUG=False): len(self.sq.get_results()) self.assertEqual(len(connections['solr'].queries), 0) with self.settings(DEBUG=True): # Redefine it to clear out the cached results. self.sq = connections['solr'].get_query() self.sq.add_filter(SQ(name='bar')) len(self.sq.get_results()) self.assertEqual(len(connections['solr'].queries), 1) self.assertEqual(connections['solr'].queries[0]['query_string'], 'name:(bar)') # And again, for good measure. self.sq = connections['solr'].get_query() self.sq.add_filter(SQ(name='bar')) self.sq.add_filter(SQ(text='moof')) len(self.sq.get_results()) self.assertEqual(len(connections['solr'].queries), 2) self.assertEqual(connections['solr'].queries[0]['query_string'], 'name:(bar)') self.assertEqual(connections['solr'].queries[1]['query_string'], u'(name:(bar) AND text:(moof))') @override_settings(DEBUG=True) class LiveSolrSearchQuerySetTestCase(TestCase): """Used to test actual implementation details of the SearchQuerySet.""" fixtures = ['base_data.json', 'bulk_data.json'] @classmethod def setUpClass(cls): super(LiveSolrSearchQuerySetTestCase, cls).setUpClass() cls._index_updated = False @classmethod def tearDownClass(cls): del cls._index_updated super(LiveSolrSearchQuerySetTestCase, cls).tearDownClass() def setUp(self): super(LiveSolrSearchQuerySetTestCase, self).setUp() # Stow. self.old_ui = connections['solr'].get_unified_index() self.ui = UnifiedIndex() self.smmi = SolrMockSearchIndex() self.ui.build(indexes=[self.smmi]) connections['solr']._index = self.ui self.sqs = SearchQuerySet('solr') self.rsqs = RelatedSearchQuerySet('solr') if not self._index_updated: std_logging.info('Reindexing test data') # Wipe it clean. clear_solr_index() # Force indexing of the content. self.smmi.update('solr') self._index_updated = True def tearDown(self): # Restore. connections['solr']._index = self.old_ui super(LiveSolrSearchQuerySetTestCase, self).tearDown() def test_load_all(self): sqs = self.sqs.load_all() self.assertTrue(len(sqs) > 0) # load_all should not change the results or their ordering: self.assertListEqual([i.id for i in sqs], [i.id for i in self.sqs]) self.assertTrue(isinstance(sqs, SearchQuerySet)) self.maxDiff = None self.assertEqual(sqs[0].object.foo, u"Registering indexes in Haystack is very similar to registering models and ``ModelAdmin`` classes in the `Django admin site`_. If you want to override the default indexing behavior for your model you can specify your own ``SearchIndex`` class. This is useful for ensuring that future-dated or non-live content is not indexed and searchable. Our ``Note`` model has a ``pub_date`` field, so let's update our code to include our own ``SearchIndex`` to exclude indexing future-dated notes:") def test_iter(self): reset_search_queries() self.assertEqual(len(connections['solr'].queries), 0) sqs = self.sqs.all() results = [int(result.pk) for result in iter(sqs)] self.assertEqual(results, list(range(1, 24))) self.assertEqual(len(connections['solr'].queries), 3) def test_slice(self): reset_search_queries() self.assertEqual(len(connections['solr'].queries), 0) results = self.sqs.all() self.assertEqual([int(result.pk) for result in results[1:11]], [2, 3, 4, 5, 6, 7, 8, 9, 10, 11]) self.assertEqual(len(connections['solr'].queries), 1) reset_search_queries() self.assertEqual(len(connections['solr'].queries), 0) results = self.sqs.all() self.assertEqual(int(results[21].pk), 22) self.assertEqual(len(connections['solr'].queries), 1) def test_values_list_slice(self): reset_search_queries() self.assertEqual(len(connections['solr'].queries), 0) # TODO: this would be a good candidate for refactoring into a TestCase subclass shared across backends # The values will come back as strings because Hasytack doesn't assume PKs are integers. # We'll prepare this set once since we're going to query the same results in multiple ways: expected_pks = [str(i) for i in [3, 2, 4, 5, 6, 7, 8, 9, 10, 11]] results = self.sqs.all().order_by('pub_date').values('pk') self.assertListEqual([i['pk'] for i in results[1:11]], expected_pks) results = self.sqs.all().order_by('pub_date').values_list('pk') self.assertListEqual([i[0] for i in results[1:11]], expected_pks) results = self.sqs.all().order_by('pub_date').values_list('pk', flat=True) self.assertListEqual(results[1:11], expected_pks) self.assertEqual(len(connections['solr'].queries), 3) def test_count(self): reset_search_queries() self.assertEqual(len(connections['solr'].queries), 0) sqs = self.sqs.all() self.assertEqual(sqs.count(), 23) self.assertEqual(sqs.count(), 23) self.assertEqual(len(sqs), 23) self.assertEqual(sqs.count(), 23) # Should only execute one query to count the length of the result set. self.assertEqual(len(connections['solr'].queries), 1) def test_manual_iter(self): results = self.sqs.all() reset_search_queries() self.assertEqual(len(connections['solr'].queries), 0) results = [int(result.pk) for result in results._manual_iter()] self.assertEqual(results, list(range(1, 24))) self.assertEqual(len(connections['solr'].queries), 3) def test_fill_cache(self): reset_search_queries() self.assertEqual(len(connections['solr'].queries), 0) results = self.sqs.all() self.assertEqual(len(results._result_cache), 0) self.assertEqual(len(connections['solr'].queries), 0) results._fill_cache(0, 10) self.assertEqual(len([result for result in results._result_cache if result is not None]), 10) self.assertEqual(len(connections['solr'].queries), 1) results._fill_cache(10, 20) self.assertEqual(len([result for result in results._result_cache if result is not None]), 20) self.assertEqual(len(connections['solr'].queries), 2) def test_cache_is_full(self): reset_search_queries() self.assertEqual(len(connections['solr'].queries), 0) self.assertEqual(self.sqs._cache_is_full(), False) results = self.sqs.all() fire_the_iterator_and_fill_cache = list(results) self.assertEqual(23, len(fire_the_iterator_and_fill_cache)) self.assertEqual(results._cache_is_full(), True) self.assertEqual(len(connections['solr'].queries), 4) def test___and__(self): sqs1 = self.sqs.filter(content='foo') sqs2 = self.sqs.filter(content='bar') sqs = sqs1 & sqs2 self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.query_filter), 2) self.assertEqual(sqs.query.build_query(), u'((foo) AND (bar))') # Now for something more complex... sqs3 = self.sqs.exclude(title='moof').filter(SQ(content='foo') | SQ(content='baz')) sqs4 = self.sqs.filter(content='bar') sqs = sqs3 & sqs4 self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.query_filter), 3) self.assertEqual(sqs.query.build_query(), u'(NOT (title:(moof)) AND ((foo) OR (baz)) AND (bar))') def test___or__(self): sqs1 = self.sqs.filter(content='foo') sqs2 = self.sqs.filter(content='bar') sqs = sqs1 | sqs2 self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.query_filter), 2) self.assertEqual(sqs.query.build_query(), u'((foo) OR (bar))') # Now for something more complex... sqs3 = self.sqs.exclude(title='moof').filter(SQ(content='foo') | SQ(content='baz')) sqs4 = self.sqs.filter(content='bar').models(MockModel) sqs = sqs3 | sqs4 self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.query_filter), 2) self.assertEqual(sqs.query.build_query(), u'((NOT (title:(moof)) AND ((foo) OR (baz))) OR (bar))') def test_auto_query(self): # Ensure bits in exact matches get escaped properly as well. # This will break horrifically if escaping isn't working. sqs = self.sqs.auto_query('"pants:rule"') self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(repr(sqs.query.query_filter), '') self.assertEqual(sqs.query.build_query(), u'("pants\\:rule")') self.assertEqual(len(sqs), 0) sqs = self.sqs.auto_query('Canon+PowerShot+ELPH+(Black)') self.assertEqual(sqs.query.build_query(), u'Canon\\+PowerShot\\+ELPH\\+\\(Black\\)') sqs = sqs.filter(tags__in=['cameras', 'electronics']) self.assertEqual(len(sqs), 0) def test_query__in(self): self.assertGreater(len(self.sqs), 0) sqs = self.sqs.filter(django_ct='core.mockmodel', django_id__in=[1, 2]) self.assertEqual(len(sqs), 2) def test_query__in_empty_list(self): """Confirm that an empty list avoids a Solr exception""" self.assertGreater(len(self.sqs), 0) sqs = self.sqs.filter(id__in=[]) self.assertEqual(len(sqs), 0) # Regressions def test_regression_proper_start_offsets(self): sqs = self.sqs.filter(text='index') self.assertNotEqual(sqs.count(), 0) id_counts = {} for item in sqs: if item.id in id_counts: id_counts[item.id] += 1 else: id_counts[item.id] = 1 for key, value in id_counts.items(): if value > 1: self.fail("Result with id '%s' seen more than once in the results." % key) def test_regression_raw_search_breaks_slicing(self): sqs = self.sqs.raw_search('text: index') page_1 = [result.pk for result in sqs[0:10]] page_2 = [result.pk for result in sqs[10:20]] for pk in page_2: if pk in page_1: self.fail("Result with id '%s' seen more than once in the results." % pk) # RelatedSearchQuerySet Tests def test_related_load_all(self): sqs = self.rsqs.load_all() # load_all should not change the results or their ordering: self.assertListEqual([i.id for i in sqs], [i.id for i in self.rsqs]) self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertTrue(len(sqs) > 0) self.assertEqual(sqs[0].object.foo, u"Registering indexes in Haystack is very similar to registering models and ``ModelAdmin`` classes in the `Django admin site`_. If you want to override the default indexing behavior for your model you can specify your own ``SearchIndex`` class. This is useful for ensuring that future-dated or non-live content is not indexed and searchable. Our ``Note`` model has a ``pub_date`` field, so let's update our code to include our own ``SearchIndex`` to exclude indexing future-dated notes:") def test_related_load_all_queryset(self): sqs = self.rsqs.load_all() # load_all should not change the results or their ordering: self.assertListEqual([i.id for i in sqs], [i.id for i in self.rsqs]) self.assertEqual(len(sqs._load_all_querysets), 0) sqs = sqs.load_all_queryset(MockModel, MockModel.objects.filter(id__gt=1)) self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs._load_all_querysets), 1) self.assertEqual([obj.object.id for obj in sqs], list(range(2, 24))) sqs = sqs.load_all_queryset(MockModel, MockModel.objects.filter(id__gt=10)) self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs._load_all_querysets), 1) self.assertEqual([obj.object.id for obj in sqs], list(range(11, 24))) self.assertEqual([obj.object.id for obj in sqs[10:20]], [21, 22, 23]) def test_related_iter(self): reset_search_queries() self.assertEqual(len(connections['solr'].queries), 0) sqs = self.rsqs.all() results = [int(result.pk) for result in iter(sqs)] self.assertEqual(results, list(range(1, 24))) self.assertEqual(len(connections['solr'].queries), 3) def test_related_slice(self): reset_search_queries() self.assertEqual(len(connections['solr'].queries), 0) results = self.rsqs.all() self.assertEqual([int(result.pk) for result in results[1:11]], [2, 3, 4, 5, 6, 7, 8, 9, 10, 11]) self.assertEqual(len(connections['solr'].queries), 1) reset_search_queries() self.assertEqual(len(connections['solr'].queries), 0) results = self.rsqs.all() self.assertEqual(int(results[21].pk), 22) self.assertEqual(len(connections['solr'].queries), 1) reset_search_queries() self.assertEqual(len(connections['solr'].queries), 0) results = self.rsqs.all() self.assertEqual([int(result.pk) for result in results[20:30]], [21, 22, 23]) self.assertEqual(len(connections['solr'].queries), 1) def test_related_manual_iter(self): results = self.rsqs.all() reset_search_queries() self.assertEqual(len(connections['solr'].queries), 0) results = [int(result.pk) for result in results._manual_iter()] self.assertEqual(results, list(range(1, 24))) self.assertEqual(len(connections['solr'].queries), 3) def test_related_fill_cache(self): reset_search_queries() self.assertEqual(len(connections['solr'].queries), 0) results = self.rsqs.all() self.assertEqual(len(results._result_cache), 0) self.assertEqual(len(connections['solr'].queries), 0) results._fill_cache(0, 10) self.assertEqual(len([result for result in results._result_cache if result is not None]), 10) self.assertEqual(len(connections['solr'].queries), 1) results._fill_cache(10, 20) self.assertEqual(len([result for result in results._result_cache if result is not None]), 20) self.assertEqual(len(connections['solr'].queries), 2) def test_related_cache_is_full(self): reset_search_queries() self.assertEqual(len(connections['solr'].queries), 0) self.assertEqual(self.rsqs._cache_is_full(), False) results = self.rsqs.all() fire_the_iterator_and_fill_cache = list(results) self.assertEqual(23, len(fire_the_iterator_and_fill_cache)) self.assertEqual(results._cache_is_full(), True) self.assertEqual(len(connections['solr'].queries), 4) def test_quotes_regression(self): sqs = self.sqs.auto_query(u"44°48'40''N 20°28'32''E") # Should not have empty terms. self.assertEqual(sqs.query.build_query(), u"(44\xb048'40''N 20\xb028'32''E)") # Should not cause Solr to 500. try: sqs.count() except Exception as exc: self.fail("raised unexpected error: %s" % exc) sqs = self.sqs.auto_query('blazing') self.assertEqual(sqs.query.build_query(), u'(blazing)') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('blazing saddles') self.assertEqual(sqs.query.build_query(), u'(blazing saddles)') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('"blazing saddles') self.assertEqual(sqs.query.build_query(), u'(\\"blazing saddles)') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('"blazing saddles"') self.assertEqual(sqs.query.build_query(), u'("blazing saddles")') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('mel "blazing saddles"') self.assertEqual(sqs.query.build_query(), u'(mel "blazing saddles")') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('mel "blazing \'saddles"') self.assertEqual(sqs.query.build_query(), u'(mel "blazing \'saddles")') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('mel "blazing \'\'saddles"') self.assertEqual(sqs.query.build_query(), u'(mel "blazing \'\'saddles")') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('mel "blazing \'\'saddles"\'') self.assertEqual(sqs.query.build_query(), u'(mel "blazing \'\'saddles" \')') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('mel "blazing \'\'saddles"\'"') self.assertEqual(sqs.query.build_query(), u'(mel "blazing \'\'saddles" \'\\")') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('"blazing saddles" mel') self.assertEqual(sqs.query.build_query(), u'("blazing saddles" mel)') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('"blazing saddles" mel brooks') self.assertEqual(sqs.query.build_query(), u'("blazing saddles" mel brooks)') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('mel "blazing saddles" brooks') self.assertEqual(sqs.query.build_query(), u'(mel "blazing saddles" brooks)') self.assertEqual(sqs.count(), 0) sqs = self.sqs.auto_query('mel "blazing saddles" "brooks') self.assertEqual(sqs.query.build_query(), u'(mel "blazing saddles" \\"brooks)') self.assertEqual(sqs.count(), 0) def test_query_generation(self): sqs = self.sqs.filter(SQ(content=AutoQuery("hello world")) | SQ(title=AutoQuery("hello world"))) self.assertEqual(sqs.query.build_query(), u"((hello world) OR title:(hello world))") def test_result_class(self): # Assert that we're defaulting to ``SearchResult``. sqs = self.sqs.all() self.assertTrue(isinstance(sqs[0], SearchResult)) # Custom class. sqs = self.sqs.result_class(MockSearchResult).all() self.assertTrue(isinstance(sqs[0], MockSearchResult)) # Reset to solr. sqs = self.sqs.result_class(None).all() self.assertTrue(isinstance(sqs[0], SearchResult)) class LiveSolrMoreLikeThisTestCase(TestCase): fixtures = ['base_data.json', 'bulk_data.json'] def setUp(self): super(LiveSolrMoreLikeThisTestCase, self).setUp() # Wipe it clean. clear_solr_index() self.old_ui = connections['solr'].get_unified_index() self.ui = UnifiedIndex() self.smmi = SolrMockModelSearchIndex() self.sammi = SolrAnotherMockModelSearchIndex() self.ui.build(indexes=[self.smmi, self.sammi]) connections['solr']._index = self.ui self.sqs = SearchQuerySet('solr') self.smmi.update('solr') self.sammi.update('solr') def tearDown(self): # Restore. connections['solr']._index = self.old_ui super(LiveSolrMoreLikeThisTestCase, self).tearDown() def test_more_like_this(self): all_mlt = self.sqs.more_like_this(MockModel.objects.get(pk=1)) self.assertEqual(all_mlt.count(), len([result.pk for result in all_mlt]), msg="mlt SearchQuerySet .count() didn't match retrieved result length") # Rather than hard-code assumptions about Solr's return order, we have a few very similar # items which we'll confirm are included in the first 5 results. This is still ugly as we're # hard-coding primary keys but it's better than breaking any time a Solr update or data # change causes a score to shift slightly top_results = [int(result.pk) for result in all_mlt[:5]] for i in (14, 6, 10, 4, 5): self.assertIn(i, top_results) filtered_mlt = self.sqs.filter(name='daniel3').more_like_this(MockModel.objects.get(pk=3)) self.assertLess(filtered_mlt.count(), all_mlt.count()) top_filtered_results = [int(result.pk) for result in filtered_mlt[:5]] for i in (16, 17, 19, 13, 23): self.assertIn(i, top_filtered_results) mlt_filtered = self.sqs.more_like_this(MockModel.objects.get(pk=3)).filter(name='daniel3') self.assertLess(mlt_filtered.count(), all_mlt.count()) top_mlt_filtered_pks = [int(result.pk) for result in mlt_filtered[:5]] for i in (17, 16, 19, 23, 13): self.assertIn(i, top_mlt_filtered_pks) filtered_mlt_with_models = self.sqs.models(MockModel).more_like_this(MockModel.objects.get(pk=1)) self.assertLessEqual(filtered_mlt_with_models.count(), all_mlt.count()) top_filtered_with_models = [int(result.pk) for result in filtered_mlt_with_models[:5]] for i in (14, 6, 4, 5, 10): self.assertIn(i, top_filtered_with_models) def test_more_like_this_defer(self): mi = MockModel.objects.defer('foo').get(pk=1) deferred = self.sqs.models(MockModel).more_like_this(mi) top_results = [int(result.pk) for result in deferred[:5]] for i in (14, 6, 4, 5, 10): self.assertIn(i, top_results) def test_more_like_this_custom_result_class(self): """Ensure that swapping the ``result_class`` works""" first_result = self.sqs.result_class(MockSearchResult).more_like_this(MockModel.objects.get(pk=1))[0] self.assertIsInstance(first_result, MockSearchResult) class LiveSolrAutocompleteTestCase(TestCase): fixtures = ['base_data.json', 'bulk_data.json'] def setUp(self): super(LiveSolrAutocompleteTestCase, self).setUp() # Wipe it clean. clear_solr_index() # Stow. self.old_ui = connections['solr'].get_unified_index() self.ui = UnifiedIndex() self.smmi = SolrAutocompleteMockModelSearchIndex() self.ui.build(indexes=[self.smmi]) connections['solr']._index = self.ui self.sqs = SearchQuerySet('solr') self.smmi.update(using='solr') def tearDown(self): # Restore. connections['solr']._index = self.old_ui super(LiveSolrAutocompleteTestCase, self).tearDown() def test_autocomplete(self): autocomplete = self.sqs.autocomplete(text_auto='mod') self.assertEqual(autocomplete.count(), 5) self.assertSetEqual(set([result.pk for result in autocomplete]), set(['1', '12', '6', '7', '14'])) self.assertTrue('mod' in autocomplete[0].text.lower()) self.assertTrue('mod' in autocomplete[1].text.lower()) self.assertTrue('mod' in autocomplete[2].text.lower()) self.assertTrue('mod' in autocomplete[3].text.lower()) self.assertTrue('mod' in autocomplete[4].text.lower()) self.assertEqual(len([result.pk for result in autocomplete]), 5) # Test multiple words. autocomplete_2 = self.sqs.autocomplete(text_auto='your mod') self.assertEqual(autocomplete_2.count(), 3) self.assertSetEqual(set([result.pk for result in autocomplete_2]), set(['1', '14', '6'])) self.assertTrue('your' in autocomplete_2[0].text.lower()) self.assertTrue('mod' in autocomplete_2[0].text.lower()) self.assertTrue('your' in autocomplete_2[1].text.lower()) self.assertTrue('mod' in autocomplete_2[1].text.lower()) self.assertTrue('your' in autocomplete_2[2].text.lower()) self.assertTrue('mod' in autocomplete_2[2].text.lower()) self.assertEqual(len([result.pk for result in autocomplete_2]), 3) # Test multiple fields. autocomplete_3 = self.sqs.autocomplete(text_auto='Django', name_auto='dan') self.assertEqual(autocomplete_3.count(), 4) self.assertSetEqual(set([result.pk for result in autocomplete_3]), set(['12', '1', '14', '22'])) self.assertEqual(len([result.pk for result in autocomplete_3]), 4) class LiveSolrRoundTripTestCase(TestCase): def setUp(self): super(LiveSolrRoundTripTestCase, self).setUp() # Wipe it clean. clear_solr_index() # Stow. self.old_ui = connections['solr'].get_unified_index() self.ui = UnifiedIndex() self.srtsi = SolrRoundTripSearchIndex() self.ui.build(indexes=[self.srtsi]) connections['solr']._index = self.ui self.sb = connections['solr'].get_backend() self.sqs = SearchQuerySet('solr') # Fake indexing. mock = MockModel() mock.id = 1 self.sb.update(self.srtsi, [mock]) def tearDown(self): # Restore. connections['solr']._index = self.old_ui super(LiveSolrRoundTripTestCase, self).tearDown() def test_round_trip(self): results = self.sqs.filter(id='core.mockmodel.1') # Sanity check. self.assertEqual(results.count(), 1) # Check the individual fields. result = results[0] self.assertEqual(result.id, 'core.mockmodel.1') self.assertEqual(result.text, 'This is some example text.') self.assertEqual(result.name, 'Mister Pants') self.assertEqual(result.is_active, True) self.assertEqual(result.post_count, 25) self.assertEqual(result.average_rating, 3.6) self.assertEqual(result.price, u'24.99') self.assertEqual(result.pub_date, datetime.date(2009, 11, 21)) self.assertEqual(result.created, datetime.datetime(2009, 11, 21, 21, 31, 00)) self.assertEqual(result.tags, ['staff', 'outdoor', 'activist', 'scientist']) self.assertEqual(result.sites, [3, 5, 1]) @unittest.skipUnless(test_pickling, 'Skipping pickling tests') class LiveSolrPickleTestCase(TestCase): fixtures = ['base_data.json', 'bulk_data.json'] def setUp(self): super(LiveSolrPickleTestCase, self).setUp() # Wipe it clean. clear_solr_index() # Stow. self.old_ui = connections['solr'].get_unified_index() self.ui = UnifiedIndex() self.smmi = SolrMockModelSearchIndex() self.sammi = SolrAnotherMockModelSearchIndex() self.ui.build(indexes=[self.smmi, self.sammi]) connections['solr']._index = self.ui self.sqs = SearchQuerySet('solr') self.smmi.update('solr') self.sammi.update('solr') def tearDown(self): # Restore. connections['solr']._index = self.old_ui super(LiveSolrPickleTestCase, self).tearDown() def test_pickling(self): results = self.sqs.all() for res in results: # Make sure the cache is full. pass in_a_pickle = pickle.dumps(results) like_a_cuke = pickle.loads(in_a_pickle) self.assertEqual(len(like_a_cuke), len(results)) self.assertEqual(like_a_cuke[0].id, results[0].id) class SolrBoostBackendTestCase(TestCase): def setUp(self): super(SolrBoostBackendTestCase, self).setUp() # Wipe it clean. self.raw_solr = pysolr.Solr(settings.HAYSTACK_CONNECTIONS['solr']['URL']) clear_solr_index() # Stow. self.old_ui = connections['solr'].get_unified_index() self.ui = UnifiedIndex() self.smmi = SolrBoostMockSearchIndex() self.ui.build(indexes=[self.smmi]) connections['solr']._index = self.ui self.sb = connections['solr'].get_backend() self.sample_objs = [] for i in range(1, 5): mock = AFourthMockModel() mock.id = i if i % 2: mock.author = 'daniel' mock.editor = 'david' else: mock.author = 'david' mock.editor = 'daniel' mock.pub_date = datetime.date(2009, 2, 25) - datetime.timedelta(days=i) self.sample_objs.append(mock) def tearDown(self): connections['solr']._index = self.old_ui super(SolrBoostBackendTestCase, self).tearDown() def test_boost(self): self.sb.update(self.smmi, self.sample_objs) self.assertEqual(self.raw_solr.search('*:*').hits, 4) results = SearchQuerySet('solr').filter(SQ(author='daniel') | SQ(editor='daniel')) self.assertEqual([result.id for result in results], [ 'core.afourthmockmodel.1', 'core.afourthmockmodel.3', 'core.afourthmockmodel.2', 'core.afourthmockmodel.4' ]) @unittest.skipIf(parse_version(pysolr.__version__) < parse_version('3.1.1'), 'content extraction requires pysolr > 3.1.1') class LiveSolrContentExtractionTestCase(TestCase): def setUp(self): super(LiveSolrContentExtractionTestCase, self).setUp() self.sb = connections['solr'].get_backend() def test_content_extraction(self): f = open(os.path.join(os.path.dirname(__file__), "content_extraction", "test.pdf"), "rb") data = self.sb.extract_file_contents(f) self.assertTrue("haystack" in data['contents']) self.assertEqual(data['metadata']['Content-Type'], [u'application/pdf']) self.assertTrue(any(i for i in data['metadata']['Keywords'] if 'SolrCell' in i)) django-haystack-2.8.0/test_haystack/solr_tests/test_solr_management_commands.py000066400000000000000000000301551325051407000303050ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import datetime import os from tempfile import mkdtemp try: from StringIO import StringIO except ImportError: from io import StringIO import pysolr from django.conf import settings from django.core.exceptions import ImproperlyConfigured from django.core.management import call_command from django.core.management.base import CommandError from django.test import TestCase from mock import patch from haystack import connections, constants, indexes from haystack.utils.loading import UnifiedIndex from ..core.models import MockModel, MockTag class SolrMockSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) name = indexes.CharField(model_attr='author', faceted=True) pub_date = indexes.DateTimeField(model_attr='pub_date') def get_model(self): return MockModel def get_updated_field(self): return 'pub_date' class SolrMockTagSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, model_attr='name') def get_model(self): return MockTag class SolrMockSecretKeySearchIndex(indexes.SearchIndex, indexes.Indexable): Th3S3cr3tK3y = indexes.CharField(document=True, model_attr='author') def get_model(self): return MockModel class ManagementCommandTestCase(TestCase): fixtures = ['base_data.json', 'bulk_data.json'] def setUp(self): super(ManagementCommandTestCase, self).setUp() self.solr = pysolr.Solr(settings.HAYSTACK_CONNECTIONS['solr']['URL']) # Stow. self.old_ui = connections['solr'].get_unified_index() self.ui = UnifiedIndex() self.smmi = SolrMockSearchIndex() self.ui.build(indexes=[self.smmi]) connections['solr']._index = self.ui def tearDown(self): connections['solr']._index = self.old_ui super(ManagementCommandTestCase, self).tearDown() def verify_indexed_documents(self): """Confirm that the documents in the search index match the database""" res = self.solr.search('*:*', fl=['id'], rows=50) self.assertEqual(res.hits, 23) indexed_doc_ids = set(i['id'] for i in res.docs) expected_doc_ids = set('core.mockmodel.%d' % i for i in MockModel.objects.values_list('pk', flat=True)) self.assertSetEqual(indexed_doc_ids, expected_doc_ids) def test_basic_commands(self): call_command('clear_index', interactive=False, verbosity=0) self.assertEqual(self.solr.search('*:*').hits, 0) call_command('update_index', verbosity=0, commit=False) self.assertEqual(self.solr.search('*:*').hits, 0) call_command('update_index', verbosity=0) self.verify_indexed_documents() call_command('clear_index', interactive=False, verbosity=0) self.assertEqual(self.solr.search('*:*').hits, 0) call_command('rebuild_index', interactive=False, verbosity=0, commit=False) self.assertEqual(self.solr.search('*:*').hits, 0) call_command('rebuild_index', interactive=False, verbosity=0, commit=True) self.verify_indexed_documents() call_command('clear_index', interactive=False, verbosity=0, commit=False) self.verify_indexed_documents() def test_remove(self): call_command('clear_index', interactive=False, verbosity=0) self.assertEqual(self.solr.search('*:*').hits, 0) call_command('update_index', verbosity=0) self.verify_indexed_documents() # Remove several instances, two of which will fit in the same block: MockModel.objects.get(pk=1).delete() MockModel.objects.get(pk=2).delete() MockModel.objects.get(pk=8).delete() self.assertEqual(self.solr.search('*:*').hits, 23) # Plain ``update_index`` doesn't fix it. call_command('update_index', verbosity=0) self.assertEqual(self.solr.search('*:*').hits, 23) # Remove without commit also doesn't affect queries: call_command('update_index', remove=True, verbosity=0, batchsize=2, commit=False) self.assertEqual(self.solr.search('*:*').hits, 23) # … but remove with commit does: call_command('update_index', remove=True, verbosity=0, batchsize=2) self.assertEqual(self.solr.search('*:*').hits, 20) def test_age(self): call_command('clear_index', interactive=False, verbosity=0) self.assertEqual(self.solr.search('*:*').hits, 0) start = datetime.datetime.now() - datetime.timedelta(hours=3) end = datetime.datetime.now() mock = MockModel.objects.get(pk=1) mock.pub_date = datetime.datetime.now() - datetime.timedelta(hours=2) mock.save() self.assertEqual(MockModel.objects.filter(pub_date__range=(start, end)).count(), 1) call_command('update_index', age=3, verbosity=0) self.assertEqual(self.solr.search('*:*').hits, 1) def test_age_with_time_zones(self): """Haystack should use django.utils.timezone.now""" from django.utils.timezone import now as django_now from haystack.management.commands.update_index import now as haystack_now self.assertIs(haystack_now, django_now, msg="update_index should use django.utils.timezone.now") with patch("haystack.management.commands.update_index.now") as m: m.return_value = django_now() self.test_age() assert m.called def test_dates(self): call_command('clear_index', interactive=False, verbosity=0) self.assertEqual(self.solr.search('*:*').hits, 0) start = datetime.datetime.now() - datetime.timedelta(hours=5, minutes=30) end = datetime.datetime.now() - datetime.timedelta(hours=2) mock_1 = MockModel.objects.get(pk=1) mock_1.pub_date = datetime.datetime.now() - datetime.timedelta(hours=5, minutes=1) mock_1.save() mock_2 = MockModel.objects.get(pk=2) mock_2.pub_date = datetime.datetime.now() - datetime.timedelta(hours=3) mock_2.save() mock_3 = MockModel.objects.get(pk=3) mock_3.pub_date = datetime.datetime.now() - datetime.timedelta(hours=1) mock_3.save() self.assertEqual(MockModel.objects.filter(pub_date__range=(start, end)).count(), 2) call_command('update_index', start_date=start.isoformat(), end_date=end.isoformat(), verbosity=0) self.assertEqual(self.solr.search('*:*').hits, 2) def test_multiprocessing(self): call_command('clear_index', interactive=False, verbosity=0) self.assertEqual(self.solr.search('*:*').hits, 0) call_command('update_index', verbosity=2, workers=2, batchsize=5) self.verify_indexed_documents() call_command('clear_index', interactive=False, verbosity=0) self.assertEqual(self.solr.search('*:*').hits, 0) call_command('update_index', verbosity=2, workers=2, batchsize=5, commit=False) self.assertEqual(self.solr.search('*:*').hits, 0) def test_build_schema_wrong_backend(self): settings.HAYSTACK_CONNECTIONS['whoosh'] = {'ENGINE': 'haystack.backends.whoosh_backend.WhooshEngine', 'PATH': mkdtemp(prefix='dummy-path-'), } connections['whoosh']._index = self.ui self.assertRaises(ImproperlyConfigured, call_command, 'build_solr_schema', using='whoosh') def test_build_schema(self): # Stow. oldhdf = constants.DOCUMENT_FIELD oldui = connections['solr'].get_unified_index() oldurl = settings.HAYSTACK_CONNECTIONS['solr']['URL'] needle = 'Th3S3cr3tK3y' constants.DOCUMENT_FIELD = needle # Force index to use new key for document_fields settings.HAYSTACK_CONNECTIONS['solr']['URL'] = settings.HAYSTACK_CONNECTIONS['solr']['URL'].rsplit('/', 1)[0] + '/mgmnt' ui = UnifiedIndex() ui.build(indexes=[SolrMockSecretKeySearchIndex()]) connections['solr']._index = ui rendered_file = StringIO() script_dir = os.path.realpath(os.path.dirname(__file__)) conf_dir = os.path.join(script_dir, 'server', 'solr', 'server', 'solr', 'mgmnt', 'conf') schema_file = os.path.join(conf_dir, 'schema.xml') solrconfig_file = os.path.join(conf_dir, 'solrconfig.xml') self.assertTrue(os.path.isdir(conf_dir), msg='Expected %s to be a directory' % conf_dir) call_command('build_solr_schema', using='solr', stdout=rendered_file) contents = rendered_file.getvalue() self.assertGreater(contents.find("name=\"%s" % needle), -1) call_command('build_solr_schema', using='solr', configure_directory=conf_dir) with open(schema_file) as s: self.assertGreater(s.read().find("name=\"%s" % needle), -1) with open(solrconfig_file) as s: self.assertGreater(s.read().find("name=\"df\">%s" % needle), -1) self.assertTrue(os.path.isfile(os.path.join(conf_dir, 'managed-schema.old'))) call_command('build_solr_schema', using='solr', reload_core=True) os.rename(schema_file, '%s.bak' % schema_file) self.assertRaises(CommandError, call_command, 'build_solr_schema', using='solr', reload_core=True) call_command('build_solr_schema', using='solr', filename=schema_file) with open(schema_file) as s: self.assertGreater(s.read().find("name=\"%s" % needle), -1) # reset constants.DOCUMENT_FIELD = oldhdf connections['solr']._index = oldui settings.HAYSTACK_CONNECTIONS['solr']['URL'] = oldurl class AppModelManagementCommandTestCase(TestCase): fixtures = ['base_data', 'bulk_data.json'] def setUp(self): super(AppModelManagementCommandTestCase, self).setUp() self.solr = pysolr.Solr(settings.HAYSTACK_CONNECTIONS['solr']['URL']) # Stow. self.old_ui = connections['solr'].get_unified_index() self.ui = UnifiedIndex() self.smmi = SolrMockSearchIndex() self.smtmi = SolrMockTagSearchIndex() self.ui.build(indexes=[self.smmi, self.smtmi]) connections['solr']._index = self.ui def tearDown(self): connections['solr']._index = self.old_ui super(AppModelManagementCommandTestCase, self).tearDown() def test_app_model_variations(self): call_command('clear_index', interactive=False, verbosity=0) self.assertEqual(self.solr.search('*:*').hits, 0) call_command('update_index', verbosity=0) self.assertEqual(self.solr.search('*:*').hits, 25) call_command('clear_index', interactive=False, verbosity=0) self.assertEqual(self.solr.search('*:*').hits, 0) call_command('update_index', 'core', verbosity=0) self.assertEqual(self.solr.search('*:*').hits, 25) call_command('clear_index', interactive=False, verbosity=0) self.assertEqual(self.solr.search('*:*').hits, 0) with self.assertRaises(ImproperlyConfigured): call_command('update_index', 'fake_app_thats_not_there') call_command('update_index', 'core', 'discovery', verbosity=0) self.assertEqual(self.solr.search('*:*').hits, 25) call_command('clear_index', interactive=False, verbosity=0) self.assertEqual(self.solr.search('*:*').hits, 0) call_command('update_index', 'discovery', verbosity=0) self.assertEqual(self.solr.search('*:*').hits, 0) call_command('clear_index', interactive=False, verbosity=0) self.assertEqual(self.solr.search('*:*').hits, 0) call_command('update_index', 'core.MockModel', verbosity=0) self.assertEqual(self.solr.search('*:*').hits, 23) call_command('clear_index', interactive=False, verbosity=0) self.assertEqual(self.solr.search('*:*').hits, 0) call_command('update_index', 'core.MockTag', verbosity=0) self.assertEqual(self.solr.search('*:*').hits, 2) call_command('clear_index', interactive=False, verbosity=0) self.assertEqual(self.solr.search('*:*').hits, 0) call_command('update_index', 'core.MockTag', 'core.MockModel', verbosity=0) self.assertEqual(self.solr.search('*:*').hits, 25) django-haystack-2.8.0/test_haystack/solr_tests/test_solr_query.py000066400000000000000000000226521325051407000254600ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import datetime from django.test import TestCase from haystack import connections from haystack.inputs import AltParser, Exact from haystack.models import SearchResult from haystack.query import SearchQuerySet, SQ from ..core.models import AnotherMockModel, MockModel class SolrSearchQueryTestCase(TestCase): fixtures = ['base_data'] def setUp(self): super(SolrSearchQueryTestCase, self).setUp() self.sq = connections['solr'].get_query() def test_build_query_all(self): self.assertEqual(self.sq.build_query(), '*:*') def test_build_query_single_word(self): self.sq.add_filter(SQ(content='hello')) self.assertEqual(self.sq.build_query(), '(hello)') def test_build_query_boolean(self): self.sq.add_filter(SQ(content=True)) self.assertEqual(self.sq.build_query(), '(true)') def test_build_query_datetime(self): self.sq.add_filter(SQ(content=datetime.datetime(2009, 5, 8, 11, 28))) self.assertEqual(self.sq.build_query(), '(2009-05-08T11:28:00Z)') def test_build_query_multiple_words_and(self): self.sq.add_filter(SQ(content='hello')) self.sq.add_filter(SQ(content='world')) self.assertEqual(self.sq.build_query(), '((hello) AND (world))') def test_build_query_multiple_words_not(self): self.sq.add_filter(~SQ(content='hello')) self.sq.add_filter(~SQ(content='world')) self.assertEqual(self.sq.build_query(), '(NOT ((hello)) AND NOT ((world)))') def test_build_query_multiple_words_or(self): self.sq.add_filter(~SQ(content='hello')) self.sq.add_filter(SQ(content='hello'), use_or=True) self.assertEqual(self.sq.build_query(), '(NOT ((hello)) OR (hello))') def test_build_query_multiple_words_mixed(self): self.sq.add_filter(SQ(content='why')) self.sq.add_filter(SQ(content='hello'), use_or=True) self.sq.add_filter(~SQ(content='world')) self.assertEqual(self.sq.build_query(), u'(((why) OR (hello)) AND NOT ((world)))') def test_build_query_phrase(self): self.sq.add_filter(SQ(content='hello world')) self.assertEqual(self.sq.build_query(), '(hello AND world)') self.sq.add_filter(SQ(content__exact='hello world')) self.assertEqual(self.sq.build_query(), u'((hello AND world) AND ("hello world"))') def test_build_query_boost(self): self.sq.add_filter(SQ(content='hello')) self.sq.add_boost('world', 5) self.assertEqual(self.sq.build_query(), "(hello) world^5") def test_correct_exact(self): self.sq.add_filter(SQ(content=Exact('hello world'))) self.assertEqual(self.sq.build_query(), '("hello world")') def test_build_query_multiple_filter_types(self): self.sq.add_filter(SQ(content='why')) self.sq.add_filter(SQ(pub_date__lte=Exact('2009-02-10 01:59:00'))) self.sq.add_filter(SQ(author__gt='daniel')) self.sq.add_filter(SQ(created__lt=Exact('2009-02-12 12:13:00'))) self.sq.add_filter(SQ(title__gte='B')) self.sq.add_filter(SQ(id__in=[1, 2, 3])) self.sq.add_filter(SQ(rating__range=[3, 5])) self.assertEqual(self.sq.build_query(), u'((why) AND pub_date:([* TO "2009-02-10 01:59:00"]) AND author:({"daniel" TO *}) AND created:({* TO "2009-02-12 12:13:00"}) AND title:(["B" TO *]) AND id:("1" OR "2" OR "3") AND rating:(["3" TO "5"]))') def test_build_complex_altparser_query(self): self.sq.add_filter(SQ(content=AltParser('dismax', "Don't panic", qf='text'))) self.sq.add_filter(SQ(pub_date__lte=Exact('2009-02-10 01:59:00'))) self.sq.add_filter(SQ(author__gt='daniel')) self.sq.add_filter(SQ(created__lt=Exact('2009-02-12 12:13:00'))) self.sq.add_filter(SQ(title__gte='B')) self.sq.add_filter(SQ(id__in=[1, 2, 3])) self.sq.add_filter(SQ(rating__range=[3, 5])) query = self.sq.build_query() self.assertTrue(u'(_query_:"{!dismax qf=text}Don\'t panic")' in query) self.assertTrue(u'pub_date:([* TO "2009-02-10 01:59:00"])' in query) self.assertTrue(u'author:({"daniel" TO *})' in query) self.assertTrue(u'created:({* TO "2009-02-12 12:13:00"})' in query) self.assertTrue(u'title:(["B" TO *])' in query) self.assertTrue(u'id:("1" OR "2" OR "3")' in query) self.assertTrue(u'rating:(["3" TO "5"])' in query) def test_build_query_multiple_filter_types_with_datetimes(self): self.sq.add_filter(SQ(content='why')) self.sq.add_filter(SQ(pub_date__lte=datetime.datetime(2009, 2, 10, 1, 59, 0))) self.sq.add_filter(SQ(author__gt='daniel')) self.sq.add_filter(SQ(created__lt=datetime.datetime(2009, 2, 12, 12, 13, 0))) self.sq.add_filter(SQ(title__gte='B')) self.sq.add_filter(SQ(id__in=[1, 2, 3])) self.sq.add_filter(SQ(rating__range=[3, 5])) self.assertEqual(self.sq.build_query(), u'((why) AND pub_date:([* TO "2009-02-10T01:59:00Z"]) AND author:({"daniel" TO *}) AND created:({* TO "2009-02-12T12:13:00Z"}) AND title:(["B" TO *]) AND id:("1" OR "2" OR "3") AND rating:(["3" TO "5"]))') def test_build_query_in_filter_multiple_words(self): self.sq.add_filter(SQ(content='why')) self.sq.add_filter(SQ(title__in=["A Famous Paper", "An Infamous Article"])) self.assertEqual(self.sq.build_query(), u'((why) AND title:("A Famous Paper" OR "An Infamous Article"))') def test_build_query_in_filter_datetime(self): self.sq.add_filter(SQ(content='why')) self.sq.add_filter(SQ(pub_date__in=[datetime.datetime(2009, 7, 6, 1, 56, 21)])) self.assertEqual(self.sq.build_query(), u'((why) AND pub_date:("2009-07-06T01:56:21Z"))') def test_build_query_in_with_set(self): self.sq.add_filter(SQ(content='why')) self.sq.add_filter(SQ(title__in=set(["A Famous Paper", "An Infamous Article"]))) query = self.sq.build_query() self.assertTrue(u'(why)' in query) # Because ordering in Py3 is now random. if 'title:("A ' in query: self.assertTrue(u'title:("A Famous Paper" OR "An Infamous Article")' in query) else: self.assertTrue(u'title:("An Infamous Article" OR "A Famous Paper")' in query) def test_build_query_with_contains(self): self.sq.add_filter(SQ(content='circular')) self.sq.add_filter(SQ(title__contains='haystack')) self.assertEqual(self.sq.build_query(), u'((circular) AND title:(*haystack*))') def test_build_query_with_endswith(self): self.sq.add_filter(SQ(content='circular')) self.sq.add_filter(SQ(title__endswith='haystack')) self.assertEqual(self.sq.build_query(), u'((circular) AND title:(*haystack))') def test_build_query_wildcard_filter_types(self): self.sq.add_filter(SQ(content='why')) self.sq.add_filter(SQ(title__startswith='haystack')) self.assertEqual(self.sq.build_query(), u'((why) AND title:(haystack*))') def test_build_query_fuzzy_filter_types(self): self.sq.add_filter(SQ(content='why')) self.sq.add_filter(SQ(title__fuzzy='haystack')) self.assertEqual(self.sq.build_query(), u'((why) AND title:(haystack~))') def test_clean(self): self.assertEqual(self.sq.clean('hello world'), 'hello world') self.assertEqual(self.sq.clean('hello AND world'), 'hello and world') self.assertEqual(self.sq.clean('hello AND OR NOT TO + - && || ! ( ) { } [ ] ^ " ~ * ? : \ / world'), 'hello and or not to \\+ \\- \\&& \\|| \\! \\( \\) \\{ \\} \\[ \\] \\^ \\" \\~ \\* \\? \\: \\\\ \\/ world') self.assertEqual(self.sq.clean('so please NOTe i am in a bAND and bORed'), 'so please NOTe i am in a bAND and bORed') def test_build_query_with_models(self): self.sq.add_filter(SQ(content='hello')) self.sq.add_model(MockModel) self.assertEqual(self.sq.build_query(), '(hello)') self.sq.add_model(AnotherMockModel) self.assertEqual(self.sq.build_query(), u'(hello)') def test_set_result_class(self): # Assert that we're defaulting to ``SearchResult``. self.assertTrue(issubclass(self.sq.result_class, SearchResult)) # Custom class. class IttyBittyResult(object): pass self.sq.set_result_class(IttyBittyResult) self.assertTrue(issubclass(self.sq.result_class, IttyBittyResult)) # Reset to default. self.sq.set_result_class(None) self.assertTrue(issubclass(self.sq.result_class, SearchResult)) def test_in_filter_values_list(self): self.sq.add_filter(SQ(content='why')) self.sq.add_filter(SQ(title__in=MockModel.objects.values_list('id', flat=True))) self.assertEqual(self.sq.build_query(), u'((why) AND title:("1" OR "2" OR "3"))') def test_narrow_sq(self): sqs = SearchQuerySet(using='solr').narrow(SQ(foo='moof')) self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.narrow_queries), 1) self.assertEqual(sqs.query.narrow_queries.pop(), 'foo:(moof)') def test_query__in(self): sqs = SearchQuerySet(using='solr').filter(id__in=[1,2,3]) self.assertEqual(sqs.query.build_query(), u'id:("1" OR "2" OR "3")') def test_query__in_empty_list(self): """Confirm that an empty list avoids a Solr exception""" sqs = SearchQuerySet(using='solr').filter(id__in=[]) self.assertEqual(sqs.query.build_query(), u'id:(!*:*)') django-haystack-2.8.0/test_haystack/solr_tests/test_templatetags.py000066400000000000000000000047761325051407000257550ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import unittest from django.template import Context, Template from django.test import TestCase from mock import call, patch from ..core.models import MockModel @patch("haystack.templatetags.more_like_this.SearchQuerySet") class MoreLikeThisTagTestCase(TestCase): fixtures = ['base_data'] def render(self, template, context): # Why on Earth does Django not have a TemplateTestCase yet? t = Template(template) c = Context(context) return t.render(c) def test_more_like_this_without_limit(self, mock_sqs): mock_model = MockModel.objects.get(pk=3) template = """{% load more_like_this %}{% more_like_this entry as related_content %}{% for rc in related_content %}{{ rc.id }}{% endfor %}""" context = {'entry': mock_model} mlt = mock_sqs.return_value.more_like_this mlt.return_value = [{"id": "test_id"}] self.assertEqual("test_id", self.render(template, context)) mlt.assert_called_once_with(mock_model) def test_more_like_this_with_limit(self, mock_sqs): mock_model = MockModel.objects.get(pk=3) template = """{% load more_like_this %}{% more_like_this entry as related_content limit 5 %}{% for rc in related_content %}{{ rc.id }}{% endfor %}""" context = {'entry': mock_model} mlt = mock_sqs.return_value.more_like_this mlt.return_value.__getitem__.return_value = [{"id": "test_id"}] self.assertEqual("test_id", self.render(template, context)) mlt.assert_called_once_with(mock_model) mock_sqs.assert_has_calls([call().more_like_this(mock_model), call().more_like_this().__getitem__(slice(None, 5))], any_order=True) # FIXME: https://github.com/toastdriven/django-haystack/issues/1069 @unittest.expectedFailure def test_more_like_this_for_model(self, mock_sqs): mock_model = MockModel.objects.get(pk=3) template = """{% load more_like_this %}{% more_like_this entry as related_content for "core.mock" limit 5 %}{% for rc in related_content %}{{ rc.id }}{% endfor %}""" context = {'entry': mock_model} self.render(template, context) mock_sqs.assert_has_calls([call().models().more_like_this(mock_model), call().models().more_like_this().__getitem__(slice(None, 5))], any_order=True) django-haystack-2.8.0/test_haystack/spatial/000077500000000000000000000000001325051407000210705ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/spatial/__init__.py000066400000000000000000000002451325051407000232020ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from ..utils import check_solr def setup(): check_solr() django-haystack-2.8.0/test_haystack/spatial/fixtures/000077500000000000000000000000001325051407000227415ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/spatial/fixtures/sample_spatial_data.json000066400000000000000000000061531325051407000276300ustar00rootroot00000000000000[ { "pk": 1, "model": "spatial.checkin", "fields": { "username": "daniel", "latitude": 38.971955031423384, "longitude": -95.23573637008667, "comment": "Man, I love the coffee at LPT!", "created": "2011-12-13 09:12:23" } }, { "pk": 2, "model": "spatial.checkin", "fields": { "username": "daniel", "latitude": 38.967667537449294, "longitude": -95.23528575897217, "comment": "At the Pig for coffee. No one is here.", "created": "2011-12-13 10:21:23" } }, { "pk": 3, "model": "spatial.checkin", "fields": { "username": "daniel", "latitude": 38.971955031423384, "longitude": -95.23573637008667, "comment": "Back to LPT's coffee.", "created": "2011-12-14 14:53:23" } }, { "pk": 4, "model": "spatial.checkin", "fields": { "username": "daniel", "latitude": 38.92776639117804, "longitude": -95.2584171295166, "comment": "I hate the lines at the post office.", "created": "2011-12-14 10:01:23" } }, { "pk": 5, "model": "spatial.checkin", "fields": { "username": "daniel", "latitude": 38.96531514451104, "longitude": -95.23622989654541, "comment": "ZOMGEncore!", "created": "2011-12-14 12:30:23" } }, { "pk": 6, "model": "spatial.checkin", "fields": { "username": "daniel", "latitude": 38.97110422641184, "longitude": -95.23511409759521, "comment": "Trying a little Java Break coffee to get the day going.", "created": "2011-12-15 08:44:23" } }, { "pk": 7, "model": "spatial.checkin", "fields": { "username": "daniel", "latitude": 38.9128152, "longitude": -94.6373083, "comment": "Apple Store! And they have coffee!", "created": "2011-12-15 11:05:23" } }, { "pk": 8, "model": "spatial.checkin", "fields": { "username": "daniel", "latitude": 38.97143787665407, "longitude": -95.23622989654541, "comment": "4bucks coffee run. :/", "created": "2011-12-16 10:10:23" } }, { "pk": 9, "model": "spatial.checkin", "fields": { "username": "daniel", "latitude": 38.97080393984995, "longitude": -95.23573637008667, "comment": "Time for lunch at Rudy's.", "created": "2011-12-16 01:23:23" } }, { "pk": 10, "model": "spatial.checkin", "fields": { "username": "daniel", "latitude": 38.92588008485826, "longitude": -95.2640175819397, "comment": "At Target. Again.", "created": "2011-12-16 19:51:23" } } ] django-haystack-2.8.0/test_haystack/spatial/models.py000066400000000000000000000020551325051407000227270ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import datetime from django.db import models class Checkin(models.Model): username = models.CharField(max_length=255) # We're going to do some non-GeoDjango action, since the setup is # complex enough. You could just as easily do: # # location = models.PointField() # # ...and your ``search_indexes.py`` could be less complex. latitude = models.FloatField() longitude = models.FloatField() comment = models.CharField(max_length=140, blank=True, default='', help_text='Say something pithy.') created = models.DateTimeField(default=datetime.datetime.now) class Meta: ordering = ['-created'] # Again, with GeoDjango, this would be unnecessary. def get_location(self): # Nothing special about this Point, but ensure that's we don't have to worry # about import paths. from haystack.utils.geo import Point pnt = Point(self.longitude, self.latitude) return pnt django-haystack-2.8.0/test_haystack/spatial/search_indexes.py000066400000000000000000000015071325051407000244310ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from haystack import indexes from .models import Checkin class CheckinSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True) username = indexes.CharField(model_attr='username') comment = indexes.CharField(model_attr='comment') # Again, if you were using GeoDjango, this could be just: # location = indexes.LocationField(model_attr='location') location = indexes.LocationField(model_attr='get_location') created = indexes.DateTimeField(model_attr='created') def get_model(self): return Checkin def prepare_text(self, obj): # Because I don't feel like creating a template just for this. return '\n'.join([obj.comment, obj.username]) django-haystack-2.8.0/test_haystack/spatial/test_spatial.py000066400000000000000000000235461325051407000241500ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.contrib.gis.geos import GEOSGeometry from django.test import TestCase from haystack import connections from haystack.exceptions import SpatialError from haystack.query import SearchQuerySet from haystack.utils.geo import (D, ensure_distance, ensure_geometry, ensure_point, ensure_wgs84, generate_bounding_box, Point) from .models import Checkin class SpatialUtilitiesTestCase(TestCase): def test_ensure_geometry(self): self.assertRaises(SpatialError, ensure_geometry, [38.97127105172941, -95.23592948913574]) ensure_geometry(GEOSGeometry('POLYGON((-95 38, -96 40, -97 42, -95 38))')) ensure_geometry(GEOSGeometry('POINT(-95.23592948913574 38.97127105172941)')) ensure_geometry(Point(-95.23592948913574, 38.97127105172941)) def test_ensure_point(self): self.assertRaises(SpatialError, ensure_point, [38.97127105172941, -95.23592948913574]) self.assertRaises(SpatialError, ensure_point, GEOSGeometry('POLYGON((-95 38, -96 40, -97 42, -95 38))')) ensure_point(Point(-95.23592948913574, 38.97127105172941)) def test_ensure_wgs84(self): self.assertRaises(SpatialError, ensure_wgs84, GEOSGeometry('POLYGON((-95 38, -96 40, -97 42, -95 38))')) orig_pnt = Point(-95.23592948913574, 38.97127105172941) std_pnt = ensure_wgs84(orig_pnt) self.assertEqual(orig_pnt.srid, None) self.assertEqual(std_pnt.srid, 4326) self.assertEqual(std_pnt.x, -95.23592948913574) self.assertEqual(std_pnt.y, 38.97127105172941) orig_pnt = Point(-95.23592948913574, 38.97127105172941) orig_pnt.srid = 2805 std_pnt = ensure_wgs84(orig_pnt) self.assertEqual(orig_pnt.srid, 2805) self.assertEqual(std_pnt.srid, 4326) # These should be different, since it got transformed. self.assertNotEqual(std_pnt.x, -95.23592948913574) self.assertNotEqual(std_pnt.y, 38.97127105172941) def test_ensure_distance(self): self.assertRaises(SpatialError, ensure_distance, [38.97127105172941, -95.23592948913574]) ensure_distance(D(mi=5)) def test_generate_bounding_box(self): downtown_bottom_left = Point(-95.23947, 38.9637903) downtown_top_right = Point(-95.23362278938293, 38.973081081164715) ((min_lat, min_lng), (max_lat, max_lng)) = generate_bounding_box(downtown_bottom_left, downtown_top_right) self.assertEqual(min_lat, 38.9637903) self.assertEqual(min_lng, -95.23947) self.assertEqual(max_lat, 38.973081081164715) self.assertEqual(max_lng, -95.23362278938293) def test_generate_bounding_box_crossing_line_date(self): downtown_bottom_left = Point(95.23947, 38.9637903) downtown_top_right = Point(-95.23362278938293, 38.973081081164715) ((south, west), (north, east)) = generate_bounding_box(downtown_bottom_left, downtown_top_right) self.assertEqual(south, 38.9637903) self.assertEqual(west, 95.23947) self.assertEqual(north, 38.973081081164715) self.assertEqual(east, -95.23362278938293) class SpatialSolrTestCase(TestCase): fixtures = ['sample_spatial_data.json'] using = 'solr' def setUp(self): super(SpatialSolrTestCase, self).setUp() self.ui = connections[self.using].get_unified_index() self.checkindex = self.ui.get_index(Checkin) self.checkindex.reindex(using=self.using) self.sqs = SearchQuerySet().using(self.using) self.downtown_pnt = Point(-95.23592948913574, 38.97127105172941) self.downtown_bottom_left = Point(-95.23947, 38.9637903) self.downtown_top_right = Point(-95.23362278938293, 38.973081081164715) self.lawrence_bottom_left = Point(-95.345535, 39.002643) self.lawrence_top_right = Point(-95.202713, 38.923626) def tearDown(self): self.checkindex.clear(using=self.using) super(SpatialSolrTestCase, self).setUp() def test_indexing(self): # Make sure the indexed data looks correct. first = Checkin.objects.get(pk=1) sqs = self.sqs.models(Checkin).filter(django_id=first.pk) self.assertEqual(sqs.count(), 1) self.assertEqual(sqs[0].username, first.username) # Make sure we've got a proper ``Point`` object. self.assertAlmostEqual(sqs[0].location.coords[0], first.longitude) self.assertAlmostEqual(sqs[0].location.coords[1], first.latitude) # Double-check, to make sure there was nothing accidentally copied # between instances. second = Checkin.objects.get(pk=2) self.assertNotEqual(second.latitude, first.latitude) sqs = self.sqs.models(Checkin).filter(django_id=second.pk) self.assertEqual(sqs.count(), 1) self.assertEqual(sqs[0].username, second.username) self.assertAlmostEqual(sqs[0].location.coords[0], second.longitude) self.assertAlmostEqual(sqs[0].location.coords[1], second.latitude) def test_within(self): self.assertEqual(self.sqs.all().count(), 10) sqs = self.sqs.within('location', self.downtown_bottom_left, self.downtown_top_right) self.assertEqual(sqs.count(), 7) sqs = self.sqs.within('location', self.lawrence_bottom_left, self.lawrence_top_right) self.assertEqual(sqs.count(), 9) def test_dwithin(self): self.assertEqual(self.sqs.all().count(), 10) sqs = self.sqs.dwithin('location', self.downtown_pnt, D(mi=0.1)) self.assertEqual(sqs.count(), 5) sqs = self.sqs.dwithin('location', self.downtown_pnt, D(mi=0.5)) self.assertEqual(sqs.count(), 7) sqs = self.sqs.dwithin('location', self.downtown_pnt, D(mi=100)) self.assertEqual(sqs.count(), 10) def test_distance_added(self): sqs = self.sqs.within('location', self.downtown_bottom_left, self.downtown_top_right).distance('location', self.downtown_pnt) self.assertEqual(sqs.count(), 7) self.assertAlmostEqual(sqs[0].distance.mi, 0.01985226) self.assertAlmostEqual(sqs[1].distance.mi, 0.03385863) self.assertAlmostEqual(sqs[2].distance.mi, 0.04539100) self.assertAlmostEqual(sqs[3].distance.mi, 0.04831436) self.assertAlmostEqual(sqs[4].distance.mi, 0.41116546) self.assertAlmostEqual(sqs[5].distance.mi, 0.25098114) self.assertAlmostEqual(sqs[6].distance.mi, 0.04831436) sqs = self.sqs.dwithin('location', self.downtown_pnt, D(mi=0.1)).distance('location', self.downtown_pnt) self.assertEqual(sqs.count(), 5) self.assertAlmostEqual(sqs[0].distance.mi, 0.01985226) self.assertAlmostEqual(sqs[1].distance.mi, 0.03385863) self.assertAlmostEqual(sqs[2].distance.mi, 0.04539100) self.assertAlmostEqual(sqs[3].distance.mi, 0.04831436) self.assertAlmostEqual(sqs[4].distance.mi, 0.04831436) def test_order_by_distance(self): sqs = self.sqs.within('location', self.downtown_bottom_left, self.downtown_top_right).distance('location', self.downtown_pnt).order_by('distance') self.assertEqual(sqs.count(), 7) self.assertEqual([result.pk for result in sqs], ['8', '9', '6', '3', '1', '2', '5']) self.assertEqual(["%0.04f" % result.distance.mi for result in sqs], ['0.0199', '0.0339', '0.0454', '0.0483', '0.0483', '0.2510', '0.4112']) sqs = self.sqs.dwithin('location', self.downtown_pnt, D(mi=0.1)).distance('location', self.downtown_pnt).order_by('distance') self.assertEqual(sqs.count(), 5) self.assertEqual([result.pk for result in sqs], ['8', '9', '6', '3', '1']) self.assertEqual(["%0.04f" % result.distance.mi for result in sqs], ['0.0199', '0.0339', '0.0454', '0.0483', '0.0483']) sqs = self.sqs.dwithin('location', self.downtown_pnt, D(mi=0.1)).distance('location', self.downtown_pnt).order_by('-distance') self.assertEqual(sqs.count(), 5) self.assertEqual([result.pk for result in sqs], ['3', '1', '6', '9', '8']) self.assertEqual(["%0.04f" % result.distance.mi for result in sqs], ['0.0483', '0.0483', '0.0454', '0.0339', '0.0199']) def test_complex(self): sqs = self.sqs.auto_query('coffee').within('location', self.downtown_bottom_left, self.downtown_top_right).distance('location', self.downtown_pnt).order_by('distance') self.assertEqual(sqs.count(), 5) self.assertEqual([result.pk for result in sqs], ['8', '6', '3', '1', '2']) self.assertEqual(["%0.04f" % result.distance.mi for result in sqs], ['0.0199', '0.0454', '0.0483', '0.0483', '0.2510']) sqs = self.sqs.auto_query('coffee').dwithin('location', self.downtown_pnt, D(mi=0.1)).distance('location', self.downtown_pnt).order_by('distance') self.assertEqual(sqs.count(), 4) self.assertEqual([result.pk for result in sqs], ['8', '6', '3', '1']) self.assertEqual(["%0.04f" % result.distance.mi for result in sqs], ['0.0199', '0.0454', '0.0483', '0.0483']) sqs = self.sqs.auto_query('coffee').dwithin('location', self.downtown_pnt, D(mi=0.1)).distance('location', self.downtown_pnt).order_by('-distance') self.assertEqual(sqs.count(), 4) self.assertEqual([result.pk for result in sqs], ['3', '1', '6', '8']) self.assertEqual(["%0.04f" % result.distance.mi for result in sqs], ['0.0483', '0.0483', '0.0454', '0.0199']) sqs = self.sqs.auto_query('coffee').within('location', self.downtown_bottom_left, self.downtown_top_right).distance('location', self.downtown_pnt).order_by('-created') self.assertEqual(sqs.count(), 5) self.assertEqual([result.pk for result in sqs], ['8', '6', '3', '2', '1']) sqs = self.sqs.auto_query('coffee').dwithin('location', self.downtown_pnt, D(mi=0.1)).distance('location', self.downtown_pnt).order_by('-created') self.assertEqual(sqs.count(), 4) self.assertEqual([result.pk for result in sqs], ['8', '6', '3', '1']) django-haystack-2.8.0/test_haystack/test_altered_internal_names.py000066400000000000000000000066321325051407000255520ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.conf import settings from django.test import TestCase from test_haystack.core.models import AnotherMockModel, MockModel from test_haystack.utils import check_solr from haystack import connection_router, connections, constants, indexes from haystack.management.commands.build_solr_schema import Command from haystack.query import SQ from haystack.utils.loading import UnifiedIndex class MockModelSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(model_attr='foo', document=True) name = indexes.CharField(model_attr='author') pub_date = indexes.DateTimeField(model_attr='pub_date') def get_model(self): return MockModel class AlteredInternalNamesTestCase(TestCase): def setUp(self): check_solr() super(AlteredInternalNamesTestCase, self).setUp() self.old_ui = connections['solr'].get_unified_index() ui = UnifiedIndex() ui.build(indexes=[MockModelSearchIndex()]) connections['solr']._index = ui constants.ID = 'my_id' constants.DJANGO_CT = 'my_django_ct' constants.DJANGO_ID = 'my_django_id' def tearDown(self): constants.ID = 'id' constants.DJANGO_CT = 'django_ct' constants.DJANGO_ID = 'django_id' connections['solr']._index = self.old_ui super(AlteredInternalNamesTestCase, self).tearDown() def test_altered_names(self): sq = connections['solr'].get_query() sq.add_filter(SQ(content='hello')) sq.add_model(MockModel) self.assertEqual(sq.build_query(), u'(hello)') sq.add_model(AnotherMockModel) self.assertEqual(sq.build_query(), u'(hello)') def test_solr_schema(self): command = Command() context_data = command.build_context(using='solr') self.assertEqual(len(context_data), 6) self.assertEqual(context_data['DJANGO_ID'], 'my_django_id') self.assertEqual(context_data['content_field_name'], 'text') self.assertEqual(context_data['DJANGO_CT'], 'my_django_ct') self.assertEqual(context_data['default_operator'], 'AND') self.assertEqual(context_data['ID'], 'my_id') self.assertEqual(len(context_data['fields']), 3) self.assertEqual(sorted(context_data['fields'], key=lambda x: x['field_name']), [ { 'indexed': 'true', 'type': 'text_en', 'stored': 'true', 'field_name': 'name', 'multi_valued': 'false' }, { 'indexed': 'true', 'type': 'date', 'stored': 'true', 'field_name': 'pub_date', 'multi_valued': 'false' }, { 'indexed': 'true', 'type': 'text_en', 'stored': 'true', 'field_name': 'text', 'multi_valued': 'false' }, ]) schema_xml = command.build_template(using='solr') self.assertTrue('my_id' in schema_xml) self.assertTrue('' in schema_xml) self.assertTrue('' in schema_xml) django-haystack-2.8.0/test_haystack/test_app_loading.py000066400000000000000000000044511325051407000233250ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from types import GeneratorType, ModuleType from django.urls import reverse from django.test import TestCase from haystack.utils import app_loading class AppLoadingTests(TestCase): def test_load_apps(self): apps = app_loading.haystack_load_apps() self.assertIsInstance(apps, (list, GeneratorType)) self.assertIn('hierarchal_app_django', apps) self.assertNotIn('test_app_without_models', apps, msg='haystack_load_apps should exclude apps without defined models') def test_get_app_modules(self): app_modules = app_loading.haystack_get_app_modules() self.assertIsInstance(app_modules, (list, GeneratorType)) for i in app_modules: self.assertIsInstance(i, ModuleType) def test_get_models_all(self): models = app_loading.haystack_get_models('core') self.assertIsInstance(models, (list, GeneratorType)) def test_get_models_specific(self): from test_haystack.core.models import MockModel models = app_loading.haystack_get_models('core.MockModel') self.assertIsInstance(models, (list, GeneratorType)) self.assertListEqual(models, [MockModel]) def test_hierarchal_app_get_models(self): models = app_loading.haystack_get_models('hierarchal_app_django') self.assertIsInstance(models, (list, GeneratorType)) self.assertSetEqual(set(str(i._meta) for i in models), set(('hierarchal_app_django.hierarchalappsecondmodel', 'hierarchal_app_django.hierarchalappmodel'))) def test_hierarchal_app_specific_model(self): models = app_loading.haystack_get_models('hierarchal_app_django.HierarchalAppModel') self.assertIsInstance(models, (list, GeneratorType)) self.assertSetEqual(set(str(i._meta) for i in models), set(('hierarchal_app_django.hierarchalappmodel', ))) class AppWithoutModelsTests(TestCase): # Confirm that everything works if an app is enabled def test_simple_view(self): url = reverse('app-without-models:simple-view') resp = self.client.get(url) self.assertEqual(resp.content.decode('utf-8'), 'OK') django-haystack-2.8.0/test_haystack/test_app_using_appconfig/000077500000000000000000000000001325051407000245055ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/test_app_using_appconfig/__init__.py000066400000000000000000000002571325051407000266220ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals default_app_config = 'test_app_using_appconfig.apps.SimpleTestAppConfig'django-haystack-2.8.0/test_haystack/test_app_using_appconfig/apps.py000066400000000000000000000004071325051407000260230ustar00rootroot00000000000000from __future__ import absolute_import, division, print_function, unicode_literals from django.apps import AppConfig class SimpleTestAppConfig(AppConfig): name = 'test_haystack.test_app_using_appconfig' verbose_name = "Simple test app using AppConfig" django-haystack-2.8.0/test_haystack/test_app_using_appconfig/migrations/000077500000000000000000000000001325051407000266615ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/test_app_using_appconfig/migrations/0001_initial.py000066400000000000000000000011341325051407000313230ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import absolute_import, division, print_function, unicode_literals from django.db import migrations, models class Migration(migrations.Migration): dependencies = [ ] operations = [ migrations.CreateModel( name='MicroBlogPost', fields=[ ('id', models.AutoField(verbose_name='ID', serialize=False, auto_created=True, primary_key=True)), ('text', models.CharField(max_length=140)), ], options={ }, bases=(models.Model,), ), ] django-haystack-2.8.0/test_haystack/test_app_using_appconfig/migrations/__init__.py000066400000000000000000000000001325051407000307600ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/test_app_using_appconfig/models.py000066400000000000000000000003301325051407000263360ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.db.models import CharField, Model class MicroBlogPost(Model): text = CharField(max_length=140) django-haystack-2.8.0/test_haystack/test_app_using_appconfig/search_indexes.py000066400000000000000000000005671325051407000300530ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from haystack import indexes from .models import MicroBlogPost class MicroBlogSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=False, model_attr='text') def get_model(self): return MicroBlogPost django-haystack-2.8.0/test_haystack/test_app_using_appconfig/tests.py000066400000000000000000000007011325051407000262170ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.test import TestCase from .models import MicroBlogPost class AppConfigTests(TestCase): def test_index_collection(self): from haystack import connections unified_index = connections['default'].get_unified_index() models = unified_index.get_indexed_models() self.assertIn(MicroBlogPost, models) django-haystack-2.8.0/test_haystack/test_app_with_hierarchy/000077500000000000000000000000001325051407000243435ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/test_app_with_hierarchy/__init__.py000066400000000000000000000001161325051407000264520ustar00rootroot00000000000000"""Test app with multiple hierarchy levels above the actual models.py file""" django-haystack-2.8.0/test_haystack/test_app_with_hierarchy/contrib/000077500000000000000000000000001325051407000260035ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/test_app_with_hierarchy/contrib/__init__.py000066400000000000000000000000001325051407000301020ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/test_app_with_hierarchy/contrib/django/000077500000000000000000000000001325051407000272455ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/test_app_with_hierarchy/contrib/django/__init__.py000066400000000000000000000000001325051407000313440ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/test_app_with_hierarchy/contrib/django/hierarchal_app_django/000077500000000000000000000000001325051407000335315ustar00rootroot00000000000000__init__.py000066400000000000000000000000001325051407000355510ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/test_app_with_hierarchy/contrib/django/hierarchal_app_djangomodels.py000066400000000000000000000004751325051407000353150ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/test_app_with_hierarchy/contrib/django/hierarchal_app_django# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.db.models import BooleanField, CharField, Model class HierarchalAppModel(Model): enabled = BooleanField(default=True) class HierarchalAppSecondModel(Model): title = CharField(max_length=16) django-haystack-2.8.0/test_haystack/test_app_without_models/000077500000000000000000000000001325051407000244005ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/test_app_without_models/__init__.py000066400000000000000000000000001325051407000264770ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/test_app_without_models/urls.py000066400000000000000000000003661325051407000257440ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.conf.urls import url from .views import simple_view urlpatterns = [ url(r'^simple-view$', simple_view, name='simple-view') ] django-haystack-2.8.0/test_haystack/test_app_without_models/views.py000066400000000000000000000003061325051407000261060ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.http import HttpResponse def simple_view(request): return HttpResponse('OK') django-haystack-2.8.0/test_haystack/test_backends.py000066400000000000000000000045111325051407000226170ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import warnings from django.core.exceptions import ImproperlyConfigured from django.test import TestCase from haystack.utils import loading class LoadBackendTestCase(TestCase): def test_load_solr(self): try: import pysolr except ImportError: warnings.warn("Pysolr doesn't appear to be installed. Unable to test loading the Solr backend.") return backend = loading.load_backend('haystack.backends.solr_backend.SolrEngine') self.assertEqual(backend.__name__, 'SolrEngine') def test_load_whoosh(self): try: import whoosh except ImportError: warnings.warn("Whoosh doesn't appear to be installed. Unable to test loading the Whoosh backend.") return backend = loading.load_backend('haystack.backends.whoosh_backend.WhooshEngine') self.assertEqual(backend.__name__, 'WhooshEngine') def test_load_elasticsearch(self): try: import elasticsearch except ImportError: warnings.warn("elasticsearch-py doesn't appear to be installed. Unable to test loading the ElasticSearch backend.") return backend = loading.load_backend('haystack.backends.elasticsearch_backend.ElasticsearchSearchEngine') self.assertEqual(backend.__name__, 'ElasticsearchSearchEngine') def test_load_simple(self): backend = loading.load_backend('haystack.backends.simple_backend.SimpleEngine') self.assertEqual(backend.__name__, 'SimpleEngine') def test_load_nonexistent(self): try: backend = loading.load_backend('foobar') self.fail() except ImproperlyConfigured as e: self.assertEqual(str(e), "The provided backend 'foobar' is not a complete Python path to a BaseEngine subclass.") try: backend = loading.load_backend('foobar.FooEngine') self.fail() except ImportError as e: pass try: backend = loading.load_backend('haystack.backends.simple_backend.FooEngine') self.fail() except ImportError as e: self.assertEqual(str(e), "The Python module 'haystack.backends.simple_backend' has no 'FooEngine' class.") django-haystack-2.8.0/test_haystack/test_discovery.py000066400000000000000000000042331325051407000230550ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.test import TestCase from test_haystack.discovery.search_indexes import FooIndex from haystack import connections from haystack.utils.loading import UnifiedIndex EXPECTED_INDEX_MODEL_COUNT = 6 class ManualDiscoveryTestCase(TestCase): def test_discovery(self): old_ui = connections['default'].get_unified_index() connections['default']._index = UnifiedIndex() ui = connections['default'].get_unified_index() self.assertEqual(len(ui.get_indexed_models()), EXPECTED_INDEX_MODEL_COUNT) ui.build(indexes=[FooIndex()]) self.assertListEqual(['discovery.foo'], [str(i._meta) for i in ui.get_indexed_models()]) ui.build(indexes=[]) self.assertListEqual([], ui.get_indexed_models()) connections['default']._index = old_ui class AutomaticDiscoveryTestCase(TestCase): def test_discovery(self): old_ui = connections['default'].get_unified_index() connections['default']._index = UnifiedIndex() ui = connections['default'].get_unified_index() self.assertEqual(len(ui.get_indexed_models()), EXPECTED_INDEX_MODEL_COUNT) # Test exclusions. ui.excluded_indexes = ['test_haystack.discovery.search_indexes.BarIndex'] ui.build() indexed_model_names = [str(i._meta) for i in ui.get_indexed_models()] self.assertIn('multipleindex.foo', indexed_model_names) self.assertIn('multipleindex.bar', indexed_model_names) self.assertNotIn('discovery.bar', indexed_model_names) ui.excluded_indexes = ['test_haystack.discovery.search_indexes.BarIndex', 'test_haystack.discovery.search_indexes.FooIndex'] ui.build() indexed_model_names = [str(i._meta) for i in ui.get_indexed_models()] self.assertIn('multipleindex.foo', indexed_model_names) self.assertIn('multipleindex.bar', indexed_model_names) self.assertListEqual([], [i for i in indexed_model_names if i.startswith('discovery')]) connections['default']._index = old_ui django-haystack-2.8.0/test_haystack/test_fields.py000066400000000000000000000524441325051407000223230ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import datetime from decimal import Decimal from mock import Mock from django.template import TemplateDoesNotExist from django.test import TestCase from test_haystack.core.models import MockModel, MockTag, ManyToManyLeftSideModel, ManyToManyRightSideModel, \ OneToManyLeftSideModel, OneToManyRightSideModel from haystack.fields import * class SearchFieldTestCase(TestCase): def test_get_iterable_objects_with_none(self): self.assertEqual([], SearchField.get_iterable_objects(None)) def test_get_iterable_objects_with_single_non_iterable_object(self): obj = object() expected = [obj] self.assertEqual(expected, SearchField.get_iterable_objects(obj)) def test_get_iterable_objects_with_list_stays_the_same(self): objects = [object(), object()] self.assertIs(objects, SearchField.get_iterable_objects(objects)) def test_get_iterable_objects_with_django_manytomany_rel(self): left_model = ManyToManyLeftSideModel.objects.create() right_model_1 = ManyToManyRightSideModel.objects.create(name='Right side 1') right_model_2 = ManyToManyRightSideModel.objects.create() left_model.related_models.add(right_model_1) left_model.related_models.add(right_model_2) result = SearchField.get_iterable_objects(left_model.related_models) self.assertTrue(right_model_1 in result) self.assertTrue(right_model_2 in result) def test_get_iterable_objects_with_django_onetomany_rel(self): left_model = OneToManyLeftSideModel.objects.create() right_model_1 = OneToManyRightSideModel.objects.create(left_side=left_model) right_model_2 = OneToManyRightSideModel.objects.create(left_side=left_model) result = SearchField.get_iterable_objects(left_model.right_side) self.assertTrue(right_model_1 in result) self.assertTrue(right_model_2 in result) def test_resolve_attributes_lookup_with_field_that_points_to_none(self): related = Mock(spec=['none_field'], none_field=None) obj = Mock(spec=['related'], related=[related]) field = SearchField(null=False) self.assertRaises(SearchFieldError, field.resolve_attributes_lookup, [obj], ['related', 'none_field']) def test_resolve_attributes_lookup_with_field_that_points_to_none_but_is_allowed_to_be_null(self): related = Mock(spec=['none_field'], none_field=None) obj = Mock(spec=['related'], related=[related]) field = SearchField(null=True) self.assertEqual([None], field.resolve_attributes_lookup([obj], ['related', 'none_field'])) def test_resolve_attributes_lookup_with_field_that_points_to_none_but_has_default(self): related = Mock(spec=['none_field'], none_field=None) obj = Mock(spec=['related'], related=[related]) field = SearchField(default='Default value') self.assertEqual(['Default value'], field.resolve_attributes_lookup([obj], ['related', 'none_field'])) def test_resolve_attributes_lookup_with_deep_relationship(self): related_lvl_2 = Mock(spec=['value'], value=1) related = Mock(spec=['related'], related=[related_lvl_2, related_lvl_2]) obj = Mock(spec=['related'], related=[related]) field = SearchField() self.assertEqual([1, 1], field.resolve_attributes_lookup([obj], ['related', 'related', 'value'])) def test_prepare_with_null_django_onetomany_rel(self): left_model = OneToManyLeftSideModel.objects.create() field = SearchField(model_attr='right_side__pk', null=True) result = field.prepare(left_model) self.assertEqual(None, result) class CharFieldTestCase(TestCase): def test_init(self): try: foo = CharField(model_attr='foo') except: self.fail() def test_prepare(self): mock = MockModel() mock.user = 'daniel' author = CharField(model_attr='user') self.assertEqual(author.prepare(mock), u'daniel') # Do a lookup through the relation. mock_tag = MockTag.objects.create(name='primary') mock = MockModel() mock.tag = mock_tag tag_name = CharField(model_attr='tag__name') self.assertEqual(tag_name.prepare(mock), u'primary') # Use the default. mock = MockModel() author = CharField(model_attr='author', default='') self.assertEqual(author.prepare(mock), u'') # Simulate failed lookups. mock_tag = MockTag.objects.create(name='primary') mock = MockModel() mock.tag = mock_tag tag_slug = CharField(model_attr='tag__slug') self.assertRaises(SearchFieldError, tag_slug.prepare, mock) # Simulate default='foo'. mock = MockModel() default = CharField(default='foo') self.assertEqual(default.prepare(mock), 'foo') # Simulate null=True. mock = MockModel() empty = CharField(null=True) self.assertEqual(empty.prepare(mock), None) mock = MockModel() mock.user = None author = CharField(model_attr='user', null=True) self.assertEqual(author.prepare(mock), None) class NgramFieldTestCase(TestCase): def test_init(self): try: foo = NgramField(model_attr='foo') except: self.fail() self.assertRaises(SearchFieldError, NgramField, faceted=True) def test_prepare(self): mock = MockModel() mock.user = 'daniel' author = NgramField(model_attr='user') self.assertEqual(author.prepare(mock), u'daniel') # Do a lookup through the relation. mock_tag = MockTag.objects.create(name='primary') mock = MockModel() mock.tag = mock_tag tag_name = NgramField(model_attr='tag__name') self.assertEqual(tag_name.prepare(mock), u'primary') # Use the default. mock = MockModel() author = NgramField(model_attr='author', default='') self.assertEqual(author.prepare(mock), u'') # Simulate failed lookups. mock_tag = MockTag.objects.create(name='primary') mock = MockModel() mock.tag = mock_tag tag_slug = NgramField(model_attr='tag__slug') self.assertRaises(SearchFieldError, tag_slug.prepare, mock) # Simulate default='foo'. mock = MockModel() default = NgramField(default='foo') self.assertEqual(default.prepare(mock), 'foo') # Simulate null=True. mock = MockModel() empty = NgramField(null=True) self.assertEqual(empty.prepare(mock), None) mock = MockModel() mock.user = None author = NgramField(model_attr='user', null=True) self.assertEqual(author.prepare(mock), None) class EdgeNgramFieldTestCase(TestCase): def test_init(self): try: foo = EdgeNgramField(model_attr='foo') except: self.fail() self.assertRaises(SearchFieldError, EdgeNgramField, faceted=True) def test_prepare(self): mock = MockModel() mock.user = 'daniel' author = EdgeNgramField(model_attr='user') self.assertEqual(author.prepare(mock), u'daniel') # Do a lookup through the relation. mock_tag = MockTag.objects.create(name='primary') mock = MockModel() mock.tag = mock_tag tag_name = EdgeNgramField(model_attr='tag__name') self.assertEqual(tag_name.prepare(mock), u'primary') # Use the default. mock = MockModel() author = EdgeNgramField(model_attr='author', default='') self.assertEqual(author.prepare(mock), u'') # Simulate failed lookups. mock_tag = MockTag.objects.create(name='primary') mock = MockModel() mock.tag = mock_tag tag_slug = EdgeNgramField(model_attr='tag__slug') self.assertRaises(SearchFieldError, tag_slug.prepare, mock) # Simulate default='foo'. mock = MockModel() default = EdgeNgramField(default='foo') self.assertEqual(default.prepare(mock), 'foo') # Simulate null=True. mock = MockModel() empty = EdgeNgramField(null=True) self.assertEqual(empty.prepare(mock), None) mock = MockModel() mock.user = None author = EdgeNgramField(model_attr='user', null=True) self.assertEqual(author.prepare(mock), None) class IntegerFieldTestCase(TestCase): def test_init(self): try: foo = IntegerField(model_attr='foo') except: self.fail() def test_prepare(self): mock = MockModel() mock.pk = 1 pk = IntegerField(model_attr='pk') self.assertEqual(pk.prepare(mock), 1) # Simulate failed lookups. mock_tag = MockTag.objects.create(name='primary') mock = MockModel() mock.tag = mock_tag tag_count = IntegerField(model_attr='tag__count') self.assertRaises(SearchFieldError, tag_count.prepare, mock) # Simulate default=1. mock = MockModel() default = IntegerField(default=1) self.assertEqual(default.prepare(mock), 1) # Simulate null=True. mock = MockModel() pk_none = IntegerField(model_attr='pk', null=True) self.assertEqual(pk_none.prepare(mock), None) class FloatFieldTestCase(TestCase): def test_init(self): try: foo = FloatField(model_attr='foo') except: self.fail() def test_prepare(self): mock = MockModel() mock.floaty = 12.5 floaty = FloatField(model_attr='floaty') self.assertEqual(floaty.prepare(mock), 12.5) # Simulate default=1.5. mock = MockModel() default = FloatField(default=1.5) self.assertEqual(default.prepare(mock), 1.5) # Simulate null=True. mock = MockModel() floaty_none = FloatField(null=True) self.assertEqual(floaty_none.prepare(mock), None) class DecimalFieldTestCase(TestCase): def test_init(self): try: foo = DecimalField(model_attr='foo') except: self.fail() def test_prepare(self): mock = MockModel() mock.floaty = Decimal('12.5') floaty = DecimalField(model_attr='floaty') self.assertEqual(floaty.prepare(mock), '12.5') # Simulate default=1.5. mock = MockModel() default = DecimalField(default='1.5') self.assertEqual(default.prepare(mock), '1.5') # Simulate null=True. mock = MockModel() floaty_none = DecimalField(null=True) self.assertEqual(floaty_none.prepare(mock), None) class BooleanFieldTestCase(TestCase): def test_init(self): try: foo = BooleanField(model_attr='foo') except: self.fail() def test_prepare(self): mock = MockModel() mock.active = True is_active = BooleanField(model_attr='active') self.assertEqual(is_active.prepare(mock), True) # Simulate default=True. mock = MockModel() default = BooleanField(default=True) self.assertEqual(default.prepare(mock), True) # Simulate null=True. mock = MockModel() booly_none = BooleanField(null=True) self.assertEqual(booly_none.prepare(mock), None) class DateFieldTestCase(TestCase): def test_init(self): try: foo = DateField(model_attr='foo') except: self.fail() def test_convert(self): pub_date = DateField() self.assertEqual(pub_date.convert('2016-02-16'), datetime.date(2016, 2, 16)) def test_prepare(self): mock = MockModel() mock.pub_date = datetime.date(2009, 2, 13) pub_date = DateField(model_attr='pub_date') self.assertEqual(pub_date.prepare(mock), datetime.date(2009, 2, 13)) # Simulate default=datetime.date(2000, 1, 1). mock = MockModel() default = DateField(default=datetime.date(2000, 1, 1)) self.assertEqual(default.prepare(mock), datetime.date(2000, 1, 1)) def test_prepare_from_string(self): mock = MockModel() mock.pub_date = datetime.date(2016, 2, 16) pub_date = DateField(model_attr='pub_date') self.assertEqual(pub_date.prepare(mock), datetime.date(2016, 2, 16)) class DateTimeFieldTestCase(TestCase): def test_init(self): try: foo = DateTimeField(model_attr='foo') except: self.fail() def test_convert(self): pub_date = DateTimeField() self.assertEqual(pub_date.convert('2016-02-16T10:02:03'), datetime.datetime(2016, 2, 16, 10, 2, 3)) def test_prepare(self): mock = MockModel() mock.pub_date = datetime.datetime(2009, 2, 13, 10, 1, 0) pub_date = DateTimeField(model_attr='pub_date') self.assertEqual(pub_date.prepare(mock), datetime.datetime(2009, 2, 13, 10, 1, 0)) # Simulate default=datetime.datetime(2009, 2, 13, 10, 01, 00). mock = MockModel() default = DateTimeField(default=datetime.datetime(2000, 1, 1, 0, 0, 0)) self.assertEqual(default.prepare(mock), datetime.datetime(2000, 1, 1, 0, 0, 0)) def test_prepare_from_string(self): mock = MockModel() mock.pub_date = '2016-02-16T10:01:02Z' pub_date = DateTimeField(model_attr='pub_date') self.assertEqual(pub_date.prepare(mock), datetime.datetime(2016, 2, 16, 10, 1, 2)) class MultiValueFieldTestCase(TestCase): def test_init(self): try: foo = MultiValueField(model_attr='foo') except: self.fail() self.assertRaises(SearchFieldError, MultiValueField, use_template=True) def test_prepare(self): mock = MockModel() mock.sites = ['3', '4', '5'] sites = MultiValueField(model_attr='sites') self.assertEqual(sites.prepare(mock), ['3', '4', '5']) # Simulate default=[1]. mock = MockModel() default = MultiValueField(default=[1]) self.assertEqual(default.prepare(mock), [1]) # Simulate null=True. mock = MockModel() multy_none = MultiValueField(null=True) self.assertEqual(multy_none.prepare(mock), None) def test_convert_with_single_string(self): field = MultiValueField() self.assertEqual(['String'], field.convert('String')) def test_convert_with_single_int(self): field = MultiValueField() self.assertEqual([1], field.convert(1)) def test_convert_with_list_of_strings(self): field = MultiValueField() self.assertEqual(['String 1', 'String 2'], field.convert(['String 1', 'String 2'])) def test_convert_with_list_of_ints(self): field = MultiValueField() self.assertEqual([1, 2, 3], field.convert([1, 2, 3])) class CharFieldWithTemplateTestCase(TestCase): def test_init(self): try: foo = CharField(use_template=True) except: self.fail() try: foo = CharField(use_template=True, template_name='foo.txt') except: self.fail() foo = CharField(use_template=True, template_name='foo.txt') self.assertEqual(foo.template_name, 'foo.txt') # Test the select_template usage. foo = CharField(use_template=True, template_name=['bar.txt', 'foo.txt']) self.assertEqual(foo.template_name, ['bar.txt', 'foo.txt']) def test_prepare(self): mock = MockModel() mock.pk = 1 mock.user = 'daniel' template1 = CharField(use_template=True) self.assertRaises(SearchFieldError, template1.prepare, mock) template2 = CharField(use_template=True) template2.instance_name = 'template_x' self.assertRaises(TemplateDoesNotExist, template2.prepare, mock) template3 = CharField(use_template=True) template3.instance_name = 'template' self.assertEqual(template3.prepare(mock), u'Indexed!\n1') template4 = CharField(use_template=True, template_name='search/indexes/foo.txt') template4.instance_name = 'template' self.assertEqual(template4.prepare(mock), u'FOO!\n') template5 = CharField(use_template=True, template_name=['foo.txt', 'search/indexes/bar.txt']) template5.instance_name = 'template' self.assertEqual(template5.prepare(mock), u'BAR!\n') ############################################################################## # The following tests look like they don't do much, but it's important because # we need to verify that the faceted variants behave like the field they # emulate. The old-broke behavior was convert everything to string. ############################################################################## class FacetFieldTestCase(TestCase): def test_init(self): # You shouldn't use the FacetField itself. try: foo = FacetField(model_attr='foo') self.fail() except: pass try: foo_exact = FacetField(facet_for='bar') self.fail() except: pass class FacetCharFieldTestCase(TestCase): def test_init(self): try: foo = FacetCharField(model_attr='foo') foo_exact = FacetCharField(facet_for='bar') except: self.fail() self.assertEqual(foo.facet_for, None) self.assertEqual(foo_exact.null, True) self.assertEqual(foo_exact.facet_for, 'bar') def test_prepare(self): mock = MockModel() mock.user = 'daniel' author = FacetCharField(model_attr='user') self.assertEqual(author.prepare(mock), u'daniel') class FacetIntegerFieldTestCase(TestCase): def test_init(self): try: foo = FacetIntegerField(model_attr='foo') foo_exact = FacetIntegerField(facet_for='bar') except: self.fail() self.assertEqual(foo.facet_for, None) self.assertEqual(foo_exact.null, True) self.assertEqual(foo_exact.facet_for, 'bar') def test_prepare(self): mock = MockModel() mock.user = 'daniel' mock.view_count = 13 view_count = FacetIntegerField(model_attr='view_count') self.assertEqual(view_count.prepare(mock), 13) class FacetFloatFieldTestCase(TestCase): def test_init(self): try: foo = FacetFloatField(model_attr='foo') foo_exact = FacetFloatField(facet_for='bar') except: self.fail() self.assertEqual(foo.facet_for, None) self.assertEqual(foo_exact.null, True) self.assertEqual(foo_exact.facet_for, 'bar') def test_prepare(self): mock = MockModel() mock.user = 'daniel' mock.price = 25.65 price = FacetFloatField(model_attr='price') self.assertEqual(price.prepare(mock), 25.65) class FacetBooleanFieldTestCase(TestCase): def test_init(self): try: foo = FacetBooleanField(model_attr='foo') foo_exact = FacetBooleanField(facet_for='bar') except: self.fail() self.assertEqual(foo.facet_for, None) self.assertEqual(foo_exact.null, True) self.assertEqual(foo_exact.facet_for, 'bar') def test_prepare(self): mock = MockModel() mock.user = 'daniel' mock.is_active = True is_active = FacetBooleanField(model_attr='is_active') self.assertEqual(is_active.prepare(mock), True) class FacetDateFieldTestCase(TestCase): def test_init(self): try: foo = FacetDateField(model_attr='foo') foo_exact = FacetDateField(facet_for='bar') except: self.fail() self.assertEqual(foo.facet_for, None) self.assertEqual(foo_exact.null, True) self.assertEqual(foo_exact.facet_for, 'bar') def test_prepare(self): mock = MockModel() mock.user = 'daniel' mock.created = datetime.date(2010, 10, 30) created = FacetDateField(model_attr='created') self.assertEqual(created.prepare(mock), datetime.date(2010, 10, 30)) class FacetDateTimeFieldTestCase(TestCase): def test_init(self): try: foo = FacetDateTimeField(model_attr='foo') foo_exact = FacetDateTimeField(facet_for='bar') except: self.fail() self.assertEqual(foo.facet_for, None) self.assertEqual(foo_exact.null, True) self.assertEqual(foo_exact.facet_for, 'bar') def test_prepare(self): mock = MockModel() mock.user = 'daniel' mock.created = datetime.datetime(2010, 10, 30, 3, 14, 25) created = FacetDateTimeField(model_attr='created') self.assertEqual(created.prepare(mock), datetime.datetime(2010, 10, 30, 3, 14, 25)) class FacetMultiValueFieldTestCase(TestCase): def test_init(self): try: foo = FacetMultiValueField(model_attr='foo') foo_exact = FacetMultiValueField(facet_for='bar') except: self.fail() self.assertEqual(foo.facet_for, None) self.assertEqual(foo_exact.null, True) self.assertEqual(foo_exact.facet_for, 'bar') def test_prepare(self): mock = MockModel() mock.user = 'daniel' mock.sites = [1, 3, 4] sites = FacetMultiValueField(model_attr='sites') self.assertEqual(sites.prepare(mock), [1, 3, 4]) django-haystack-2.8.0/test_haystack/test_forms.py000066400000000000000000000141541325051407000221770ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import absolute_import, division, print_function, unicode_literals from django.test import TestCase from test_haystack.core.models import AnotherMockModel, MockModel from test_haystack.test_views import BasicAnotherMockModelSearchIndex, BasicMockModelSearchIndex from haystack import connection_router, connections from haystack.forms import FacetedSearchForm, model_choices, ModelSearchForm, SearchForm from haystack.query import EmptySearchQuerySet, SearchQuerySet from haystack.utils.loading import UnifiedIndex class SearchFormTestCase(TestCase): def setUp(self): super(SearchFormTestCase, self).setUp() # Stow. self.old_unified_index = connections['default']._index self.ui = UnifiedIndex() self.bmmsi = BasicMockModelSearchIndex() self.bammsi = BasicAnotherMockModelSearchIndex() self.ui.build(indexes=[self.bmmsi, self.bammsi]) connections['default']._index = self.ui # Update the "index". backend = connections['default'].get_backend() backend.clear() backend.update(self.bmmsi, MockModel.objects.all()) self.sqs = SearchQuerySet() def tearDown(self): connections['default']._index = self.old_unified_index super(SearchFormTestCase, self).tearDown() def test_unbound(self): sf = SearchForm({}, searchqueryset=self.sqs) self.assertEqual(sf.errors, {}) self.assertEqual(sf.is_valid(), True) # This shouldn't blow up. sqs = sf.search() self.assertTrue(isinstance(sqs, EmptySearchQuerySet)) class ModelSearchFormTestCase(TestCase): def setUp(self): super(ModelSearchFormTestCase, self).setUp() # Stow. self.old_unified_index = connections['default']._index self.ui = UnifiedIndex() self.bmmsi = BasicMockModelSearchIndex() self.bammsi = BasicAnotherMockModelSearchIndex() self.ui.build(indexes=[self.bmmsi, self.bammsi]) connections['default']._index = self.ui # Update the "index". backend = connections['default'].get_backend() backend.clear() backend.update(self.bmmsi, MockModel.objects.all()) self.sqs = SearchQuerySet() def tearDown(self): connections['default']._index = self.old_unified_index super(ModelSearchFormTestCase, self).tearDown() def test_models_regression_1(self): # Regression for issue #1. msf = ModelSearchForm({ 'query': 'test', 'models': ['core.mockmodel', 'core.anothermockmodel'], }, searchqueryset=self.sqs) self.assertEqual(msf.fields['models'].choices, [('core.anothermockmodel', u'Another mock models'), ('core.mockmodel', u'Mock models')]) self.assertEqual(msf.errors, {}) self.assertEqual(msf.is_valid(), True) sqs_with_models = msf.search() self.assertEqual(len(sqs_with_models.query.models), 2) def test_model_choices(self): self.assertEqual(len(model_choices()), 2) self.assertEqual([option[1] for option in model_choices()], [u'Another mock models', u'Mock models']) def test_model_choices_unicode(self): stowed_verbose_name_plural = MockModel._meta.verbose_name_plural MockModel._meta.verbose_name_plural = u'☃' self.assertEqual(len(model_choices()), 2) self.assertEqual([option[1] for option in model_choices()], [u'Another mock models', u'☃']) MockModel._meta.verbose_name_plural = stowed_verbose_name_plural class FacetedSearchFormTestCase(TestCase): def setUp(self): super(FacetedSearchFormTestCase, self).setUp() # Stow. self.old_unified_index = connections['default']._index self.ui = UnifiedIndex() self.bmmsi = BasicMockModelSearchIndex() self.bammsi = BasicAnotherMockModelSearchIndex() self.ui.build(indexes=[self.bmmsi, self.bammsi]) connections['default']._index = self.ui # Update the "index". backend = connections['default'].get_backend() backend.clear() backend.update(self.bmmsi, MockModel.objects.all()) self.sqs = SearchQuerySet() def tearDown(self): connections['default']._index = self.old_unified_index super(FacetedSearchFormTestCase, self).tearDown() def test_init_with_selected_facets(self): sf = FacetedSearchForm({}, searchqueryset=self.sqs) self.assertEqual(sf.errors, {}) self.assertEqual(sf.is_valid(), True) self.assertEqual(sf.selected_facets, []) sf = FacetedSearchForm({}, selected_facets=[], searchqueryset=self.sqs) self.assertEqual(sf.errors, {}) self.assertEqual(sf.is_valid(), True) self.assertEqual(sf.selected_facets, []) sf = FacetedSearchForm({}, selected_facets=['author:daniel'], searchqueryset=self.sqs) self.assertEqual(sf.errors, {}) self.assertEqual(sf.is_valid(), True) self.assertEqual(sf.selected_facets, ['author:daniel']) sf = FacetedSearchForm({}, selected_facets=['author:daniel', 'author:chris'], searchqueryset=self.sqs) self.assertEqual(sf.errors, {}) self.assertEqual(sf.is_valid(), True) self.assertEqual(sf.selected_facets, ['author:daniel', 'author:chris']) def test_search(self): sf = FacetedSearchForm({'q': 'test'}, selected_facets=[], searchqueryset=self.sqs) sqs = sf.search() self.assertEqual(sqs.query.narrow_queries, set()) # Test the "skip no-colon" bits. sf = FacetedSearchForm({'q': 'test'}, selected_facets=['authordaniel'], searchqueryset=self.sqs) sqs = sf.search() self.assertEqual(sqs.query.narrow_queries, set()) sf = FacetedSearchForm({'q': 'test'}, selected_facets=['author:daniel'], searchqueryset=self.sqs) sqs = sf.search() self.assertEqual(sqs.query.narrow_queries, set([u'author:"daniel"'])) sf = FacetedSearchForm({'q': 'test'}, selected_facets=['author:daniel', 'author:chris'], searchqueryset=self.sqs) sqs = sf.search() self.assertEqual(sqs.query.narrow_queries, set([u'author:"daniel"', u'author:"chris"'])) django-haystack-2.8.0/test_haystack/test_generic_views.py000066400000000000000000000051701325051407000237000ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.test.client import RequestFactory from django.test.testcases import TestCase from haystack.forms import ModelSearchForm from haystack.generic_views import SearchView class GenericSearchViewsTestCase(TestCase): """Test case for the generic search views.""" def setUp(self): super(GenericSearchViewsTestCase, self).setUp() self.query = 'haystack' self.request = self.get_request( url='/some/random/url?q={0}'.format(self.query) ) def test_get_form_kwargs(self): """Test getting the search view form kwargs.""" v = SearchView() v.request = self.request form_kwargs = v.get_form_kwargs() self.assertEqual(form_kwargs.get('data').get('q'), self.query) self.assertEqual(form_kwargs.get('initial'), {}) self.assertTrue('searchqueryset' in form_kwargs) self.assertTrue('load_all' in form_kwargs) def test_search_view_response(self): """Test the generic SearchView response.""" response = SearchView.as_view()(request=self.request) context = response.context_data self.assertEqual(context['query'], self.query) self.assertEqual(context.get('view').__class__, SearchView) self.assertEqual(context.get('form').__class__, ModelSearchForm) self.assertIn('page_obj', context) self.assertNotIn('page', context) def test_search_view_form_valid(self): """Test the generic SearchView form is valid.""" v = SearchView() v.kwargs = {} v.request = self.request form = v.get_form(v.get_form_class()) response = v.form_valid(form) context = response.context_data self.assertEqual(context['query'], self.query) def test_search_view_form_invalid(self): """Test the generic SearchView form is invalid.""" v = SearchView() v.kwargs = {} v.request = self.request form = v.get_form(v.get_form_class()) response = v.form_invalid(form) context = response.context_data self.assertTrue('query' not in context) def get_request(self, url, method='get', data=None, **kwargs): """Gets the request object for the view. :param url: a mock url to use for the request :param method: the http method to use for the request ('get', 'post', etc). """ factory = RequestFactory() factory_func = getattr(factory, method) request = factory_func(url, data=data or {}, **kwargs) return request django-haystack-2.8.0/test_haystack/test_indexes.py000066400000000000000000000671421325051407000225150ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import datetime import time from threading import Thread from django.test import TestCase from django.utils.six.moves import queue from test_haystack.core.models import (AFifthMockModel, AThirdMockModel, ManyToManyLeftSideModel, ManyToManyRightSideModel, MockModel) from haystack import connection_router, connections, indexes from haystack.exceptions import SearchFieldError from haystack.utils.loading import UnifiedIndex class BadSearchIndex1(indexes.SearchIndex, indexes.Indexable): author = indexes.CharField(model_attr='author') pub_date = indexes.DateTimeField(model_attr='pub_date') def get_model(self): return MockModel class BadSearchIndex2(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) content2 = indexes.CharField(document=True, use_template=True) author = indexes.CharField(model_attr='author') pub_date = indexes.DateTimeField(model_attr='pub_date') def get_model(self): return MockModel class GoodMockSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) author = indexes.CharField(model_attr='author') pub_date = indexes.DateTimeField(model_attr='pub_date') extra = indexes.CharField(indexed=False, use_template=True) def get_model(self): return MockModel # For testing inheritance... class AltGoodMockSearchIndex(GoodMockSearchIndex, indexes.Indexable): additional = indexes.CharField(model_attr='author') def get_model(self): return MockModel class GoodCustomMockSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) author = indexes.CharField(model_attr='author', faceted=True) pub_date = indexes.DateTimeField(model_attr='pub_date', faceted=True) extra = indexes.CharField(indexed=False, use_template=True) hello = indexes.CharField(model_attr='hello') def prepare(self, obj): super(GoodCustomMockSearchIndex, self).prepare(obj) self.prepared_data['whee'] = 'Custom preparation.' return self.prepared_data def prepare_author(self, obj): return "Hi, I'm %s" % self.prepared_data['author'] def load_all_queryset(self): return self.get_model()._default_manager.filter(id__gt=1) def get_model(self): return MockModel def index_queryset(self, using=None): return MockModel.objects.all() def read_queryset(self, using=None): return MockModel.objects.filter(author__in=['daniel1', 'daniel3']) def build_queryset(self, start_date=None, end_date=None): return MockModel.objects.filter(author__in=['daniel1', 'daniel3']) class GoodNullableMockSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) author = indexes.CharField(model_attr='author', null=True, faceted=True) def get_model(self): return MockModel class GoodOverriddenFieldNameMockSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True, index_fieldname='more_content') author = indexes.CharField(model_attr='author', index_fieldname='name_s') hello = indexes.CharField(model_attr='hello') def get_model(self): return MockModel class GoodFacetedMockSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) author = indexes.CharField(model_attr='author') author_foo = indexes.FacetCharField(facet_for='author') pub_date = indexes.DateTimeField(model_attr='pub_date') pub_date_exact = indexes.FacetDateTimeField(facet_for='pub_date') def get_model(self): return MockModel def prepare_author(self, obj): return "Hi, I'm %s" % self.prepared_data['author'] def prepare_pub_date_exact(self, obj): return "2010-10-26T01:54:32" class MROFieldsSearchIndexA(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, model_attr='test_a') def get_model(self): return MockModel class MROFieldsSearchIndexB(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, model_attr='test_b') def get_model(self): return MockModel class MROFieldsSearchChild(MROFieldsSearchIndexA, MROFieldsSearchIndexB): pass class ModelWithManyToManyFieldAndAttributeLookupSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True) related_models = indexes.MultiValueField(model_attr='related_models__name') def get_model(self): return ManyToManyLeftSideModel class SearchIndexTestCase(TestCase): fixtures = ['base_data'] def setUp(self): super(SearchIndexTestCase, self).setUp() self.sb = connections['default'].get_backend() self.mi = GoodMockSearchIndex() self.cmi = GoodCustomMockSearchIndex() self.cnmi = GoodNullableMockSearchIndex() self.gfmsi = GoodFacetedMockSearchIndex() # Fake the unified index. self.old_unified_index = connections['default']._index self.ui = UnifiedIndex() self.ui.build(indexes=[self.mi]) connections['default']._index = self.ui self.sample_docs = { u'core.mockmodel.1': { 'text': u'Indexed!\n1', 'django_id': u'1', 'django_ct': u'core.mockmodel', 'extra': u'Stored!\n1', 'author': u'daniel1', 'pub_date': datetime.datetime(2009, 3, 17, 6, 0), 'id': u'core.mockmodel.1' }, u'core.mockmodel.2': { 'text': u'Indexed!\n2', 'django_id': u'2', 'django_ct': u'core.mockmodel', 'extra': u'Stored!\n2', 'author': u'daniel2', 'pub_date': datetime.datetime(2009, 3, 17, 7, 0), 'id': u'core.mockmodel.2' }, u'core.mockmodel.3': { 'text': u'Indexed!\n3', 'django_id': u'3', 'django_ct': u'core.mockmodel', 'extra': u'Stored!\n3', 'author': u'daniel3', 'pub_date': datetime.datetime(2009, 3, 17, 8, 0), 'id': u'core.mockmodel.3' } } def tearDown(self): connections['default']._index = self.old_unified_index super(SearchIndexTestCase, self).tearDown() def test_no_contentfield_present(self): self.assertRaises(SearchFieldError, BadSearchIndex1) def test_too_many_contentfields_present(self): self.assertRaises(SearchFieldError, BadSearchIndex2) def test_contentfield_present(self): try: mi = GoodMockSearchIndex() except: self.fail() def test_proper_fields(self): self.assertEqual(len(self.mi.fields), 4) self.assertTrue('text' in self.mi.fields) self.assertTrue(isinstance(self.mi.fields['text'], indexes.CharField)) self.assertTrue('author' in self.mi.fields) self.assertTrue(isinstance(self.mi.fields['author'], indexes.CharField)) self.assertTrue('pub_date' in self.mi.fields) self.assertTrue(isinstance(self.mi.fields['pub_date'], indexes.DateTimeField)) self.assertTrue('extra' in self.mi.fields) self.assertTrue(isinstance(self.mi.fields['extra'], indexes.CharField)) self.assertEqual(len(self.cmi.fields), 7) self.assertTrue('text' in self.cmi.fields) self.assertTrue(isinstance(self.cmi.fields['text'], indexes.CharField)) self.assertTrue('author' in self.cmi.fields) self.assertTrue(isinstance(self.cmi.fields['author'], indexes.CharField)) self.assertTrue('author_exact' in self.cmi.fields) self.assertTrue(isinstance(self.cmi.fields['author_exact'], indexes.FacetCharField)) self.assertTrue('pub_date' in self.cmi.fields) self.assertTrue(isinstance(self.cmi.fields['pub_date'], indexes.DateTimeField)) self.assertTrue('pub_date_exact' in self.cmi.fields) self.assertTrue(isinstance(self.cmi.fields['pub_date_exact'], indexes.FacetDateTimeField)) self.assertTrue('extra' in self.cmi.fields) self.assertTrue(isinstance(self.cmi.fields['extra'], indexes.CharField)) self.assertTrue('hello' in self.cmi.fields) self.assertTrue(isinstance(self.cmi.fields['extra'], indexes.CharField)) def test_index_queryset(self): self.assertEqual(len(self.cmi.index_queryset()), 3) def test_read_queryset(self): self.assertEqual(len(self.cmi.read_queryset()), 2) def test_build_queryset(self): # The custom SearchIndex.build_queryset returns the same records as # the read_queryset self.assertEqual(len(self.cmi.build_queryset()), 2) # Store a reference to the original method old_guf = self.mi.__class__.get_updated_field self.mi.__class__.get_updated_field = lambda self: 'pub_date' # With an updated field, we should get have filtered results sd = datetime.datetime(2009, 3, 17, 7, 0) self.assertEqual(len(self.mi.build_queryset(start_date=sd)), 2) ed = datetime.datetime(2009, 3, 17, 7, 59) self.assertEqual(len(self.mi.build_queryset(end_date=ed)), 2) sd = datetime.datetime(2009, 3, 17, 6, 0) ed = datetime.datetime(2009, 3, 17, 6, 59) self.assertEqual(len(self.mi.build_queryset(start_date=sd, end_date=ed)), 1) # Remove the updated field for the next test del self.mi.__class__.get_updated_field # The default should return all 3 even if we specify a start date # because there is no updated field specified self.assertEqual(len(self.mi.build_queryset(start_date=sd)), 3) # Restore the original attribute self.mi.__class__.get_updated_field = old_guf def test_prepare(self): mock = MockModel() mock.pk = 20 mock.author = 'daniel%s' % mock.id mock.pub_date = datetime.datetime(2009, 1, 31, 4, 19, 0) self.assertEqual(len(self.mi.prepare(mock)), 7) self.assertEqual(sorted(self.mi.prepare(mock).keys()), ['author', 'django_ct', 'django_id', 'extra', 'id', 'pub_date', 'text']) def test_custom_prepare(self): mock = MockModel() mock.pk = 20 mock.author = 'daniel%s' % mock.id mock.pub_date = datetime.datetime(2009, 1, 31, 4, 19, 0) self.assertEqual(len(self.cmi.prepare(mock)), 11) self.assertEqual(sorted(self.cmi.prepare(mock).keys()), ['author', 'author_exact', 'django_ct', 'django_id', 'extra', 'hello', 'id', 'pub_date', 'pub_date_exact', 'text', 'whee']) self.assertEqual(len(self.cmi.full_prepare(mock)), 11) self.assertEqual(sorted(self.cmi.full_prepare(mock).keys()), ['author', 'author_exact', 'django_ct', 'django_id', 'extra', 'hello', 'id', 'pub_date', 'pub_date_exact', 'text', 'whee']) def test_thread_safety(self): # This is a regression. ``SearchIndex`` used to write to # ``self.prepared_data``, which would leak between threads if things # went too fast. exceptions = [] def threaded_prepare(index_queue, index, model): try: index.queue = index_queue prepped = index.prepare(model) except Exception as e: exceptions.append(e) raise class ThreadedSearchIndex(GoodMockSearchIndex): def prepare_author(self, obj): if obj.pk == 20: time.sleep(0.1) else: time.sleep(0.5) index_queue.put(self.prepared_data['author']) return self.prepared_data['author'] tmi = ThreadedSearchIndex() index_queue = queue.Queue() mock_1 = MockModel() mock_1.pk = 20 mock_1.author = 'foo' mock_1.pub_date = datetime.datetime(2009, 1, 31, 4, 19, 0) mock_2 = MockModel() mock_2.pk = 21 mock_2.author = 'daniel%s' % mock_2.id mock_2.pub_date = datetime.datetime(2009, 1, 31, 4, 19, 0) th1 = Thread(target=threaded_prepare, args=(index_queue, tmi, mock_1)) th2 = Thread(target=threaded_prepare, args=(index_queue, tmi, mock_2)) th1.start() th2.start() th1.join() th2.join() mock_1_result = index_queue.get() mock_2_result = index_queue.get() self.assertEqual(mock_1_result, u'foo') self.assertEqual(mock_2_result, u'daniel21') def test_custom_prepare_author(self): mock = MockModel() mock.pk = 20 mock.author = 'daniel%s' % mock.id mock.pub_date = datetime.datetime(2009, 1, 31, 4, 19, 0) self.assertEqual(len(self.cmi.prepare(mock)), 11) self.assertEqual(sorted(self.cmi.prepare(mock).keys()), ['author', 'author_exact', 'django_ct', 'django_id', 'extra', 'hello', 'id', 'pub_date', 'pub_date_exact', 'text', 'whee']) self.assertEqual(len(self.cmi.full_prepare(mock)), 11) self.assertEqual(sorted(self.cmi.full_prepare(mock).keys()), ['author', 'author_exact', 'django_ct', 'django_id', 'extra', 'hello', 'id', 'pub_date', 'pub_date_exact', 'text', 'whee']) self.assertEqual(self.cmi.prepared_data['author'], "Hi, I'm daniel20") self.assertEqual(self.cmi.prepared_data['author_exact'], "Hi, I'm daniel20") def test_custom_model_attr(self): mock = MockModel() mock.pk = 20 mock.author = 'daniel%s' % mock.id mock.pub_date = datetime.datetime(2009, 1, 31, 4, 19, 0) self.assertEqual(len(self.cmi.prepare(mock)), 11) self.assertEqual(sorted(self.cmi.prepare(mock).keys()), ['author', 'author_exact', 'django_ct', 'django_id', 'extra', 'hello', 'id', 'pub_date', 'pub_date_exact', 'text', 'whee']) self.assertEqual(len(self.cmi.full_prepare(mock)), 11) self.assertEqual(sorted(self.cmi.full_prepare(mock).keys()), ['author', 'author_exact', 'django_ct', 'django_id', 'extra', 'hello', 'id', 'pub_date', 'pub_date_exact', 'text', 'whee']) self.assertEqual(self.cmi.prepared_data['hello'], u'World!') def test_custom_index_fieldname(self): mock = MockModel() mock.pk = 20 mock.author = 'daniel%s' % mock.id mock.pub_date = datetime.datetime(2009, 1, 31, 4, 19, 0) cofnmi = GoodOverriddenFieldNameMockSearchIndex() self.assertEqual(len(cofnmi.prepare(mock)), 6) self.assertEqual(sorted(cofnmi.prepare(mock).keys()), ['django_ct', 'django_id', 'hello', 'id', 'more_content', 'name_s']) self.assertEqual(cofnmi.prepared_data['name_s'], u'daniel20') self.assertEqual(cofnmi.get_content_field(), 'more_content') def test_get_content_field(self): self.assertEqual(self.mi.get_content_field(), 'text') def test_update(self): self.sb.clear() self.assertEqual(self.sb.search('*')['hits'], 0) self.mi.update() self.assertEqual(self.sb.search('*')['hits'], 3) self.sb.clear() def test_update_object(self): self.sb.clear() self.assertEqual(self.sb.search('*')['hits'], 0) mock = MockModel() mock.pk = 20 mock.author = 'daniel%s' % mock.id mock.pub_date = datetime.datetime(2009, 1, 31, 4, 19, 0) self.mi.update_object(mock) self.assertEqual([(res.content_type(), res.pk) for res in self.sb.search('*')['results']], [(u'core.mockmodel', u'20')]) self.sb.clear() def test_remove_object(self): self.mi.update() self.assertEqual(self.sb.search('*')['hits'], 3) mock = MockModel() mock.pk = 20 mock.author = 'daniel%s' % mock.id mock.pub_date = datetime.datetime(2009, 1, 31, 4, 19, 0) self.mi.update_object(mock) self.assertEqual(self.sb.search('*')['hits'], 4) self.mi.remove_object(mock) self.assertEqual([(res.content_type(), res.pk) for res in self.sb.search('*')['results']], [(u'core.mockmodel', u'1'), (u'core.mockmodel', u'2'), (u'core.mockmodel', u'3')]) # Put it back so we can test passing kwargs. mock = MockModel() mock.pk = 20 mock.author = 'daniel%s' % mock.id mock.pub_date = datetime.datetime(2009, 1, 31, 4, 19, 0) self.mi.update_object(mock) self.assertEqual(self.sb.search('*')['hits'], 4) self.mi.remove_object(mock, commit=False) self.assertEqual([(res.content_type(), res.pk) for res in self.sb.search('*')['results']], [(u'core.mockmodel', u'1'), (u'core.mockmodel', u'2'), (u'core.mockmodel', u'3'), (u'core.mockmodel', u'20')]) self.sb.clear() def test_clear(self): self.mi.update() self.assertGreater(self.sb.search('*')['hits'], 0) self.mi.clear() self.assertEqual(self.sb.search('*')['hits'], 0) def test_reindex(self): self.mi.reindex() self.assertEqual([(res.content_type(), res.pk) for res in self.sb.search('*')['results']], [(u'core.mockmodel', u'1'), (u'core.mockmodel', u'2'), (u'core.mockmodel', u'3')]) self.sb.clear() def test_inheritance(self): try: agmi = AltGoodMockSearchIndex() except: self.fail() self.assertEqual(len(agmi.fields), 5) self.assertTrue('text' in agmi.fields) self.assertTrue(isinstance(agmi.fields['text'], indexes.CharField)) self.assertTrue('author' in agmi.fields) self.assertTrue(isinstance(agmi.fields['author'], indexes.CharField)) self.assertTrue('pub_date' in agmi.fields) self.assertTrue(isinstance(agmi.fields['pub_date'], indexes.DateTimeField)) self.assertTrue('extra' in agmi.fields) self.assertTrue(isinstance(agmi.fields['extra'], indexes.CharField)) self.assertTrue('additional' in agmi.fields) self.assertTrue(isinstance(agmi.fields['additional'], indexes.CharField)) def test_proper_field_resolution(self): mrofsc = MROFieldsSearchChild() mock = MockModel() mock.pk = 20 mock.author = 'daniel%s' % mock.id mock.pub_date = datetime.datetime(2009, 1, 31, 4, 19, 0) mock.test_a = 'This is A' mock.test_b = 'This is B' self.assertEqual(len(mrofsc.fields), 1) prepped_data = mrofsc.prepare(mock) self.assertEqual(len(prepped_data), 4) self.assertEqual(prepped_data['text'], 'This is A') def test_load_all_queryset(self): self.assertEqual([obj.id for obj in self.cmi.load_all_queryset()], [2, 3]) def test_nullable(self): mock = MockModel() mock.pk = 20 mock.author = None mock.pub_date = datetime.datetime(2009, 1, 31, 4, 19, 0) prepared_data = self.cnmi.prepare(mock) self.assertEqual(len(prepared_data), 6) self.assertEqual(sorted(prepared_data.keys()), ['author', 'author_exact', 'django_ct', 'django_id', 'id', 'text']) prepared_data = self.cnmi.full_prepare(mock) self.assertEqual(len(prepared_data), 4) self.assertEqual(sorted(prepared_data.keys()), ['django_ct', 'django_id', 'id', 'text']) def test_custom_facet_fields(self): mock = MockModel() mock.pk = 20 mock.author = 'daniel' mock.pub_date = datetime.datetime(2009, 1, 31, 4, 19, 0) prepared_data = self.gfmsi.prepare(mock) self.assertEqual(len(prepared_data), 8) self.assertEqual(sorted(prepared_data.keys()), ['author', 'author_foo', 'django_ct', 'django_id', 'id', 'pub_date', 'pub_date_exact', 'text']) prepared_data = self.gfmsi.full_prepare(mock) self.assertEqual(len(prepared_data), 8) self.assertEqual(sorted(prepared_data.keys()), ['author', 'author_foo', 'django_ct', 'django_id', 'id', 'pub_date', 'pub_date_exact', 'text']) self.assertEqual(prepared_data['author_foo'], u"Hi, I'm daniel") self.assertEqual(prepared_data['pub_date_exact'], '2010-10-26T01:54:32') class BasicModelSearchIndex(indexes.ModelSearchIndex, indexes.Indexable): class Meta: model = MockModel class FieldsModelSearchIndex(indexes.ModelSearchIndex, indexes.Indexable): class Meta: model = MockModel fields = ['author', 'pub_date'] class ExcludesModelSearchIndex(indexes.ModelSearchIndex, indexes.Indexable): class Meta: model = MockModel excludes = ['author', 'foo'] class FieldsWithOverrideModelSearchIndex(indexes.ModelSearchIndex, indexes.Indexable): foo = indexes.IntegerField(model_attr='foo') class Meta: model = MockModel fields = ['author', 'foo'] def get_index_fieldname(self, f): if f.name == 'author': return 'author_bar' else: return f.name class YetAnotherBasicModelSearchIndex(indexes.ModelSearchIndex, indexes.Indexable): text = indexes.CharField(document=True) class Meta: model = AThirdMockModel class GhettoAFifthMockModelSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True) def get_model(self): return AFifthMockModel def index_queryset(self, using=None): # Index everything, return self.get_model().objects.complete_set() def read_queryset(self, using=None): return self.get_model().objects.all() class ReadQuerySetTestSearchIndex(indexes.SearchIndex, indexes.Indexable): author = indexes.CharField(model_attr='author', document=True) def get_model(self): return AFifthMockModel def read_queryset(self, using=None): return self.get_model().objects.complete_set() class TextReadQuerySetTestSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(model_attr='author', document=True) def get_model(self): return AFifthMockModel def read_queryset(self, using=None): return self.get_model().objects.complete_set() class ModelWithManyToManyFieldModelSearchIndex(indexes.ModelSearchIndex): def get_model(self): return ManyToManyLeftSideModel class ModelSearchIndexTestCase(TestCase): def setUp(self): super(ModelSearchIndexTestCase, self).setUp() self.sb = connections['default'].get_backend() self.bmsi = BasicModelSearchIndex() self.fmsi = FieldsModelSearchIndex() self.emsi = ExcludesModelSearchIndex() self.fwomsi = FieldsWithOverrideModelSearchIndex() self.yabmsi = YetAnotherBasicModelSearchIndex() self.m2mmsi = ModelWithManyToManyFieldModelSearchIndex() def test_basic(self): self.assertEqual(len(self.bmsi.fields), 4) self.assertTrue('foo' in self.bmsi.fields) self.assertTrue(isinstance(self.bmsi.fields['foo'], indexes.CharField)) self.assertEqual(self.bmsi.fields['foo'].null, False) self.assertEqual(self.bmsi.fields['foo'].index_fieldname, 'foo') self.assertTrue('author' in self.bmsi.fields) self.assertTrue(isinstance(self.bmsi.fields['author'], indexes.CharField)) self.assertEqual(self.bmsi.fields['author'].null, False) self.assertTrue('pub_date' in self.bmsi.fields) self.assertTrue(isinstance(self.bmsi.fields['pub_date'], indexes.DateTimeField)) self.assertTrue(isinstance(self.bmsi.fields['pub_date'].default, datetime.datetime)) self.assertTrue('text' in self.bmsi.fields) self.assertTrue(isinstance(self.bmsi.fields['text'], indexes.CharField)) self.assertEqual(self.bmsi.fields['text'].document, True) self.assertEqual(self.bmsi.fields['text'].use_template, True) def test_fields(self): self.assertEqual(len(self.fmsi.fields), 3) self.assertTrue('author' in self.fmsi.fields) self.assertTrue(isinstance(self.fmsi.fields['author'], indexes.CharField)) self.assertTrue('pub_date' in self.fmsi.fields) self.assertTrue(isinstance(self.fmsi.fields['pub_date'], indexes.DateTimeField)) self.assertTrue('text' in self.fmsi.fields) self.assertTrue(isinstance(self.fmsi.fields['text'], indexes.CharField)) def test_excludes(self): self.assertEqual(len(self.emsi.fields), 2) self.assertTrue('pub_date' in self.emsi.fields) self.assertTrue(isinstance(self.emsi.fields['pub_date'], indexes.DateTimeField)) self.assertTrue('text' in self.emsi.fields) self.assertTrue(isinstance(self.emsi.fields['text'], indexes.CharField)) self.assertNotIn('related_models', self.m2mmsi.fields) def test_fields_with_override(self): self.assertEqual(len(self.fwomsi.fields), 3) self.assertTrue('author' in self.fwomsi.fields) self.assertTrue(isinstance(self.fwomsi.fields['author'], indexes.CharField)) self.assertTrue('foo' in self.fwomsi.fields) self.assertTrue(isinstance(self.fwomsi.fields['foo'], indexes.IntegerField)) self.assertTrue('text' in self.fwomsi.fields) self.assertTrue(isinstance(self.fwomsi.fields['text'], indexes.CharField)) def test_overriding_field_name_with_get_index_fieldname(self): self.assertTrue(self.fwomsi.fields['foo'].index_fieldname, 'foo') self.assertTrue(self.fwomsi.fields['author'].index_fieldname, 'author_bar') def test_float_integer_fields(self): self.assertEqual(len(self.yabmsi.fields), 5) self.assertEqual(sorted(self.yabmsi.fields.keys()), ['author', 'average_delay', 'pub_date', 'text', 'view_count']) self.assertTrue('author' in self.yabmsi.fields) self.assertTrue(isinstance(self.yabmsi.fields['author'], indexes.CharField)) self.assertEqual(self.yabmsi.fields['author'].null, False) self.assertTrue('pub_date' in self.yabmsi.fields) self.assertTrue(isinstance(self.yabmsi.fields['pub_date'], indexes.DateTimeField)) self.assertTrue(isinstance(self.yabmsi.fields['pub_date'].default, datetime.datetime)) self.assertTrue('text' in self.yabmsi.fields) self.assertTrue(isinstance(self.yabmsi.fields['text'], indexes.CharField)) self.assertEqual(self.yabmsi.fields['text'].document, True) self.assertEqual(self.yabmsi.fields['text'].use_template, False) self.assertTrue('view_count' in self.yabmsi.fields) self.assertTrue(isinstance(self.yabmsi.fields['view_count'], indexes.IntegerField)) self.assertEqual(self.yabmsi.fields['view_count'].null, False) self.assertEqual(self.yabmsi.fields['view_count'].index_fieldname, 'view_count') self.assertTrue('average_delay' in self.yabmsi.fields) self.assertTrue(isinstance(self.yabmsi.fields['average_delay'], indexes.FloatField)) self.assertEqual(self.yabmsi.fields['average_delay'].null, False) self.assertEqual(self.yabmsi.fields['average_delay'].index_fieldname, 'average_delay') class ModelWithManyToManyFieldAndAttributeLookupSearchIndexTestCase(TestCase): def test_full_prepare(self): index = ModelWithManyToManyFieldAndAttributeLookupSearchIndex() left_model = ManyToManyLeftSideModel.objects.create() right_model_1 = ManyToManyRightSideModel.objects.create(name='Right side 1') right_model_2 = ManyToManyRightSideModel.objects.create() left_model.related_models.add(right_model_1) left_model.related_models.add(right_model_2) result = index.full_prepare(left_model) self.assertDictEqual( result, { 'django_ct': 'core.manytomanyleftsidemodel', 'django_id': '1', 'text': None, 'id': 'core.manytomanyleftsidemodel.1', 'related_models': ['Right side 1', 'Default name'], } ) django-haystack-2.8.0/test_haystack/test_inputs.py000066400000000000000000000070521325051407000223720ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.test import TestCase from haystack import connections, inputs class InputTestCase(TestCase): def setUp(self): super(InputTestCase, self).setUp() self.query_obj = connections['default'].get_query() def test_raw_init(self): raw = inputs.Raw('hello OR there, :you') self.assertEqual(raw.query_string, 'hello OR there, :you') self.assertEqual(raw.kwargs, {}) self.assertEqual(raw.post_process, False) raw = inputs.Raw('hello OR there, :you', test='really') self.assertEqual(raw.query_string, 'hello OR there, :you') self.assertEqual(raw.kwargs, {'test': 'really'}) self.assertEqual(raw.post_process, False) def test_raw_prepare(self): raw = inputs.Raw('hello OR there, :you') self.assertEqual(raw.prepare(self.query_obj), 'hello OR there, :you') def test_clean_init(self): clean = inputs.Clean('hello OR there, :you') self.assertEqual(clean.query_string, 'hello OR there, :you') self.assertEqual(clean.post_process, True) def test_clean_prepare(self): clean = inputs.Clean('hello OR there, :you') self.assertEqual(clean.prepare(self.query_obj), 'hello OR there, :you') def test_exact_init(self): exact = inputs.Exact('hello OR there, :you') self.assertEqual(exact.query_string, 'hello OR there, :you') self.assertEqual(exact.post_process, True) def test_exact_prepare(self): exact = inputs.Exact('hello OR there, :you') self.assertEqual(exact.prepare(self.query_obj), u'"hello OR there, :you"') # Incorrect, but the backend doesn't implement much of anything useful. exact = inputs.Exact('hello OR there, :you', clean=True) self.assertEqual(exact.prepare(self.query_obj), u'"hello OR there, :you"') def test_not_init(self): not_it = inputs.Not('hello OR there, :you') self.assertEqual(not_it.query_string, 'hello OR there, :you') self.assertEqual(not_it.post_process, True) def test_not_prepare(self): not_it = inputs.Not('hello OR there, :you') self.assertEqual(not_it.prepare(self.query_obj), u'NOT (hello OR there, :you)') def test_autoquery_init(self): autoquery = inputs.AutoQuery('panic -don\'t "froody dude"') self.assertEqual(autoquery.query_string, 'panic -don\'t "froody dude"') self.assertEqual(autoquery.post_process, False) def test_autoquery_prepare(self): autoquery = inputs.AutoQuery('panic -don\'t "froody dude"') self.assertEqual(autoquery.prepare(self.query_obj), u'panic NOT don\'t "froody dude"') def test_altparser_init(self): altparser = inputs.AltParser('dismax') self.assertEqual(altparser.parser_name, 'dismax') self.assertEqual(altparser.query_string, '') self.assertEqual(altparser.kwargs, {}) self.assertEqual(altparser.post_process, False) altparser = inputs.AltParser('dismax', 'douglas adams', qf='author', mm=1) self.assertEqual(altparser.parser_name, 'dismax') self.assertEqual(altparser.query_string, 'douglas adams') self.assertEqual(altparser.kwargs, {'mm': 1, 'qf': 'author'}) self.assertEqual(altparser.post_process, False) def test_altparser_prepare(self): altparser = inputs.AltParser('dismax', 'douglas adams', qf='author', mm=1) # Not supported on that backend. self.assertEqual(altparser.prepare(self.query_obj), '') django-haystack-2.8.0/test_haystack/test_loading.py000066400000000000000000000342151325051407000224660ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import unittest from django.conf import settings from django.core.exceptions import ImproperlyConfigured from django.test import TestCase, override_settings from test_haystack.core.models import AnotherMockModel, MockModel from haystack import indexes from haystack.exceptions import NotHandled, SearchFieldError from haystack.utils import loading try: import pysolr except ImportError: pysolr = False class ConnectionHandlerTestCase(TestCase): def test_init(self): ch = loading.ConnectionHandler({}) self.assertEqual(ch.connections_info, {}) ch = loading.ConnectionHandler({ 'default': { 'ENGINE': 'haystack.backends.solr_backend.SolrEngine', 'URL': 'http://localhost:9001/solr/test_default', }, }) self.assertEqual(ch.connections_info, { 'default': { 'ENGINE': 'haystack.backends.solr_backend.SolrEngine', 'URL': 'http://localhost:9001/solr/test_default', }, }) @unittest.skipIf(pysolr is False, "pysolr required") def test_get_item(self): ch = loading.ConnectionHandler({}) try: empty_engine = ch['default'] self.fail() except ImproperlyConfigured: pass ch = loading.ConnectionHandler({ 'default': { 'ENGINE': 'haystack.backends.solr_backend.SolrEngine', 'URL': 'http://localhost:9001/solr/test_default', }, }) solr_engine = ch['default'] backend_path, memory_address = repr(solr_engine).strip('<>').split(' object at ') self.assertEqual(backend_path, 'haystack.backends.solr_backend.SolrEngine') solr_engine_2 = ch['default'] backend_path_2, memory_address_2 = repr(solr_engine_2).strip('<>').split(' object at ') self.assertEqual(backend_path_2, 'haystack.backends.solr_backend.SolrEngine') # Ensure we're loading out of the memorized connection. self.assertEqual(memory_address_2, memory_address) try: empty_engine = ch['slave'] self.fail() except ImproperlyConfigured: pass def test_get_unified_index(self): ch = loading.ConnectionHandler({ 'default': { 'ENGINE': 'haystack.backends.simple_backend.SimpleEngine', } }) ui = ch['default'].get_unified_index() klass, address = repr(ui).strip('<>').split(' object at ') self.assertEqual(str(klass), 'haystack.utils.loading.UnifiedIndex') ui_2 = ch['default'].get_unified_index() klass_2, address_2 = repr(ui_2).strip('<>').split(' object at ') self.assertEqual(str(klass_2), 'haystack.utils.loading.UnifiedIndex') self.assertEqual(address_2, address) class ConnectionRouterTestCase(TestCase): @override_settings() def test_init(self): del settings.HAYSTACK_ROUTERS cr = loading.ConnectionRouter() self.assertEqual([str(route.__class__) for route in cr.routers], [""]) @override_settings(HAYSTACK_ROUTERS=['haystack.routers.DefaultRouter']) def test_router_override1(self): cr = loading.ConnectionRouter() self.assertEqual([str(route.__class__) for route in cr.routers], [""]) @override_settings(HAYSTACK_ROUTERS=[]) def test_router_override2(self): cr = loading.ConnectionRouter() self.assertEqual([str(route.__class__) for route in cr.routers], [""]) @override_settings(HAYSTACK_ROUTERS=['test_haystack.mocks.MockMasterSlaveRouter', 'haystack.routers.DefaultRouter']) def test_router_override3(self): cr = loading.ConnectionRouter() self.assertEqual([str(route.__class__) for route in cr.routers], ["", ""]) @override_settings() def test_actions1(self): del settings.HAYSTACK_ROUTERS cr = loading.ConnectionRouter() self.assertEqual(cr.for_read(), 'default') self.assertEqual(cr.for_write(), ['default']) @override_settings(HAYSTACK_ROUTERS=['test_haystack.mocks.MockMasterSlaveRouter', 'haystack.routers.DefaultRouter']) def test_actions2(self): cr = loading.ConnectionRouter() self.assertEqual(cr.for_read(), 'slave') self.assertEqual(cr.for_write(), ['master', 'default']) @override_settings(HAYSTACK_ROUTERS=['test_haystack.mocks.MockPassthroughRouter', 'test_haystack.mocks.MockMasterSlaveRouter', 'haystack.routers.DefaultRouter']) def test_actions3(self): cr = loading.ConnectionRouter() # Demonstrate pass-through self.assertEqual(cr.for_read(), 'slave') self.assertEqual(cr.for_write(), ['master', 'default']) # Demonstrate that hinting can change routing. self.assertEqual(cr.for_read(pass_through=False), 'pass') self.assertEqual(cr.for_write(pass_through=False), ['pass', 'master', 'default']) @override_settings(HAYSTACK_ROUTERS=['test_haystack.mocks.MockMultiRouter', 'haystack.routers.DefaultRouter']) def test_actions4(self): cr = loading.ConnectionRouter() # Demonstrate that a router can return multiple backends in the "for_write" method self.assertEqual(cr.for_read(), 'default') self.assertEqual(cr.for_write(), ['multi1', 'multi2', 'default']) class MockNotAModel(object): pass class FakeSearchIndex(indexes.BasicSearchIndex, indexes.Indexable): def update_object(self, instance, **kwargs): # Incorrect behavior but easy to test and all we care about is that we # make it here. We rely on the `SearchIndex` tests to ensure correct # behavior. return True def remove_object(self, instance, **kwargs): # Incorrect behavior but easy to test and all we care about is that we # make it here. We rely on the `SearchIndex` tests to ensure correct # behavior. return True def get_model(self): return MockModel class InvalidSearchIndex(indexes.SearchIndex, indexes.Indexable): document = indexes.CharField(document=True) def get_model(self): return MockModel class BasicMockModelSearchIndex(indexes.BasicSearchIndex, indexes.Indexable): def get_model(self): return MockModel class BasicAnotherMockModelSearchIndex(indexes.BasicSearchIndex, indexes.Indexable): def get_model(self): return AnotherMockModel class ValidSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True) author = indexes.CharField(index_fieldname='name') title = indexes.CharField(indexed=False) def get_model(self): return MockModel class AlternateValidSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True) author = indexes.CharField(faceted=True) title = indexes.CharField(faceted=True) def get_model(self): return AnotherMockModel class ExplicitFacetSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True) author = indexes.CharField(faceted=True) title = indexes.CharField() title_facet = indexes.FacetCharField(facet_for='title') bare_facet = indexes.FacetCharField() def get_model(self): return MockModel class MultiValueValidSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True) author = indexes.MultiValueField(stored=False) title = indexes.CharField(indexed=False) def get_model(self): return MockModel class UnifiedIndexTestCase(TestCase): def setUp(self): super(UnifiedIndexTestCase, self).setUp() self.ui = loading.UnifiedIndex() self.ui.build([]) def test_get_index(self): self.assertRaises(NotHandled, self.ui.get_index, MockModel) try: self.ui.get_index(MockModel) except NotHandled as e: self.assertTrue(MockModel.__name__ in str(e)) self.ui.build(indexes=[BasicMockModelSearchIndex()]) self.assertTrue(isinstance(self.ui.get_index(MockModel), indexes.BasicSearchIndex)) def test_get_indexed_models(self): self.assertEqual(self.ui.get_indexed_models(), []) self.ui.build(indexes=[ValidSearchIndex()]) indexed_models = self.ui.get_indexed_models() self.assertEqual(len(indexed_models), 1) self.assertTrue(MockModel in indexed_models) def test_get_indexes(self): self.assertEqual(self.ui.get_indexes(), {}) index = ValidSearchIndex() self.ui.build(indexes=[index]) results = self.ui.get_indexes() self.assertEqual(len(results), 1) self.assertTrue(MockModel in results) self.assertEqual(results[MockModel], index) def test_all_searchfields(self): self.ui.build(indexes=[BasicMockModelSearchIndex()]) fields = self.ui.all_searchfields() self.assertEqual(len(fields), 1) self.assertTrue('text' in fields) self.assertTrue(isinstance(fields['text'], indexes.CharField)) self.assertEqual(fields['text'].document, True) self.assertEqual(fields['text'].use_template, True) self.ui.build(indexes=[BasicMockModelSearchIndex(), AlternateValidSearchIndex()]) fields = self.ui.all_searchfields() self.assertEqual(len(fields), 5) self.assertEqual(sorted(fields.keys()), ['author', 'author_exact', 'text', 'title', 'title_exact']) self.assertTrue('text' in fields) self.assertTrue(isinstance(fields['text'], indexes.CharField)) self.assertEqual(fields['text'].document, True) self.assertEqual(fields['text'].use_template, True) self.assertTrue('title' in fields) self.assertTrue(isinstance(fields['title'], indexes.CharField)) self.assertEqual(fields['title'].document, False) self.assertEqual(fields['title'].use_template, False) self.assertEqual(fields['title'].faceted, True) self.assertEqual(fields['title'].indexed, True) self.assertTrue('author' in fields) self.assertTrue(isinstance(fields['author'], indexes.CharField)) self.assertEqual(fields['author'].document, False) self.assertEqual(fields['author'].use_template, False) self.assertEqual(fields['author'].faceted, True) self.assertEqual(fields['author'].stored, True) self.assertEqual(fields['author'].index_fieldname, 'author') self.ui.build(indexes=[AlternateValidSearchIndex(), MultiValueValidSearchIndex()]) fields = self.ui.all_searchfields() self.assertEqual(len(fields), 5) self.assertEqual(sorted(fields.keys()), ['author', 'author_exact', 'text', 'title', 'title_exact']) self.assertTrue('text' in fields) self.assertTrue(isinstance(fields['text'], indexes.CharField)) self.assertEqual(fields['text'].document, True) self.assertEqual(fields['text'].use_template, False) self.assertTrue('title' in fields) self.assertTrue(isinstance(fields['title'], indexes.CharField)) self.assertEqual(fields['title'].document, False) self.assertEqual(fields['title'].use_template, False) self.assertEqual(fields['title'].faceted, True) self.assertEqual(fields['title'].indexed, True) self.assertTrue('author' in fields) self.assertTrue(isinstance(fields['author'], indexes.MultiValueField)) self.assertEqual(fields['author'].document, False) self.assertEqual(fields['author'].use_template, False) self.assertEqual(fields['author'].stored, True) self.assertEqual(fields['author'].faceted, True) self.assertEqual(fields['author'].index_fieldname, 'author') try: self.ui.build(indexes=[AlternateValidSearchIndex(), InvalidSearchIndex()]) self.fail() except SearchFieldError: pass def test_get_index_fieldname(self): self.assertEqual(self.ui._fieldnames, {}) self.ui.build(indexes=[ValidSearchIndex(), BasicAnotherMockModelSearchIndex()]) self.ui.get_index_fieldname('text') self.assertEqual(self.ui._fieldnames, {'text': 'text', 'title': 'title', 'author': 'name'}) self.assertEqual(self.ui.get_index_fieldname('text'), 'text') self.assertEqual(self.ui.get_index_fieldname('author'), 'name') self.assertEqual(self.ui.get_index_fieldname('title'), 'title') # Reset the internal state to test the invalid case. self.ui.reset() self.assertEqual(self.ui._fieldnames, {}) try: self.ui.build(indexes=[ValidSearchIndex(), AlternateValidSearchIndex()]) self.fail() except SearchFieldError: pass def test_basic_get_facet_field_name(self): self.assertEqual(self.ui._facet_fieldnames, {}) self.ui.build(indexes=[BasicMockModelSearchIndex(), AlternateValidSearchIndex()]) self.ui.get_facet_fieldname('text') self.assertEqual(self.ui._facet_fieldnames, {'title': 'title_exact', 'author': 'author_exact'}) self.assertEqual(self.ui.get_index_fieldname('text'), 'text') self.assertEqual(self.ui.get_index_fieldname('author'), 'author') self.assertEqual(self.ui.get_index_fieldname('title'), 'title') self.assertEqual(self.ui.get_facet_fieldname('text'), 'text') self.assertEqual(self.ui.get_facet_fieldname('author'), 'author_exact') self.assertEqual(self.ui.get_facet_fieldname('title'), 'title_exact') def test_more_advanced_get_facet_field_name(self): self.assertEqual(self.ui._facet_fieldnames, {}) self.ui.build(indexes=[BasicAnotherMockModelSearchIndex(), ExplicitFacetSearchIndex()]) self.ui.get_facet_fieldname('text') self.assertEqual(self.ui._facet_fieldnames, {'bare_facet': 'bare_facet', 'title': 'title_facet', 'author': 'author_exact'}) self.assertEqual(self.ui.get_facet_fieldname('title'), 'title_facet') self.assertEqual(self.ui.get_facet_fieldname('bare_facet'), 'bare_facet') django-haystack-2.8.0/test_haystack/test_management_commands.py000066400000000000000000000121021325051407000250350ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.conf import settings from django.core.management import call_command from django.test import TestCase from mock import call, patch __all__ = ['CoreManagementCommandsTestCase'] class CoreManagementCommandsTestCase(TestCase): @patch("haystack.management.commands.update_index.Command.update_backend") def test_update_index_default_using(self, m): """update_index uses default index when --using is not present""" call_command('update_index') for k in settings.HAYSTACK_CONNECTIONS: self.assertTrue(call('core', k) in m.call_args_list) @patch("haystack.management.commands.update_index.Command.update_backend") def test_update_index_using(self, m): """update_index only applies to indexes specified with --using""" call_command('update_index', verbosity=0, using=["eng", "fra"]) m.assert_any_call("core", "eng") m.assert_any_call("core", "fra") self.assertTrue(call("core", "default") not in m.call_args_list, "update_index should have been restricted to the index specified with --using") @patch("haystack.loading.ConnectionHandler.__getitem__") def test_clear_index_default_using(self, m): """clear_index uses all keys when --using is not present""" call_command('clear_index', verbosity=0, interactive=False) self.assertEqual(len(settings.HAYSTACK_CONNECTIONS), m.call_count) for k in settings.HAYSTACK_CONNECTIONS: self.assertTrue(call(k) in m.call_args_list) @patch("haystack.loading.ConnectionHandler.__getitem__") def test_clear_index_using(self, m): """clear_index only applies to indexes specified with --using""" call_command('clear_index', verbosity=0, interactive=False, using=["eng"]) m.assert_called_with("eng") self.assertTrue(m.return_value.get_backend.called, "backend.clear() should be called") self.assertTrue(call("default") not in m.call_args_list, "clear_index should have been restricted to the index specified with --using") @patch("haystack.loading.ConnectionHandler.__getitem__") @patch("haystack.management.commands.update_index.Command.update_backend") def test_rebuild_index_default_using(self, m1, m2): """rebuild_index uses default index when --using is not present""" call_command('rebuild_index', verbosity=0, interactive=False) self.assertEqual(len(settings.HAYSTACK_CONNECTIONS), m2.call_count) for k in settings.HAYSTACK_CONNECTIONS: self.assertTrue(call(k) in m2.call_args_list) m1.assert_any_call("core", "default") m1.assert_any_call("core", "whoosh") @patch("haystack.loading.ConnectionHandler.__getitem__") @patch("haystack.management.commands.update_index.Command.update_backend") def test_rebuild_index_using(self, m1, m2): """rebuild_index passes --using to clear_index and update_index""" call_command('rebuild_index', verbosity=0, interactive=False, using=["eng"]) m2.assert_called_with("eng") m1.assert_any_call("core", "eng") @patch('haystack.management.commands.update_index.Command.handle', return_value='') @patch('haystack.management.commands.clear_index.Command.handle', return_value='') def test_rebuild_index(self, mock_handle_clear, mock_handle_update): call_command('rebuild_index', interactive=False) self.assertTrue(mock_handle_clear.called) self.assertTrue(mock_handle_update.called) @patch('haystack.management.commands.update_index.Command.handle') @patch('haystack.management.commands.clear_index.Command.handle') def test_rebuild_index_nocommit(self, *mocks): call_command('rebuild_index', interactive=False, commit=False) for m in mocks: self.assertEqual(m.call_count, 1) args, kwargs = m.call_args self.assertIn('commit', kwargs) self.assertEqual(False, kwargs['commit']) @patch('haystack.management.commands.clear_index.Command.handle', return_value='') @patch('haystack.management.commands.update_index.Command.handle', return_value='') def test_rebuild_index_nocommit(self, update_mock, clear_mock): """ Confirm that command-line option parsing produces the same results as using call_command() directly, mostly as a sanity check for the logic in rebuild_index which combines the option_lists for its component commands. """ from haystack.management.commands.rebuild_index import Command Command().run_from_argv(['django-admin.py', 'rebuild_index', '--noinput', '--nocommit']) for m in (clear_mock, update_mock): self.assertEqual(m.call_count, 1) args, kwargs = m.call_args self.assertIn('commit', kwargs) self.assertEqual(False, kwargs['commit']) args, kwargs = clear_mock.call_args self.assertIn('interactive', kwargs) self.assertIs(kwargs['interactive'], False) django-haystack-2.8.0/test_haystack/test_managers.py000066400000000000000000000216631325051407000226510ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import datetime from django.test import TestCase from test_haystack.core.models import MockModel from haystack import connections from haystack.manager import SearchIndexManager from haystack.models import SearchResult from haystack.query import EmptySearchQuerySet, SearchQuerySet, ValuesListSearchQuerySet, ValuesSearchQuerySet from haystack.utils.geo import D, Point from .mocks import CharPKMockSearchBackend from .test_views import BasicAnotherMockModelSearchIndex, BasicMockModelSearchIndex class CustomManager(SearchIndexManager): def filter(self, *args, **kwargs): return self.get_search_queryset().filter(content='foo1').filter(*args, **kwargs) class CustomMockModelIndexWithObjectsManager(BasicMockModelSearchIndex): objects = CustomManager() class CustomMockModelIndexWithAnotherManager(BasicMockModelSearchIndex): another = CustomManager() class ManagerTestCase(TestCase): fixtures = ['bulk_data.json'] def setUp(self): super(ManagerTestCase, self).setUp() self.search_index = BasicMockModelSearchIndex # Update the "index". backend = connections['default'].get_backend() backend.clear() backend.update(self.search_index(), MockModel.objects.all()) ui = connections['default'].get_unified_index() ui.build([BasicMockModelSearchIndex(), BasicAnotherMockModelSearchIndex()]) self.search_queryset = BasicMockModelSearchIndex.objects.all() def test_queryset(self): self.assertTrue(isinstance(self.search_queryset, SearchQuerySet)) def test_none(self): self.assertTrue(isinstance(self.search_index.objects.none(), EmptySearchQuerySet)) def test_filter(self): sqs = self.search_index.objects.filter(content='foo') self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.query_filter), 1) def test_exclude(self): sqs = self.search_index.objects.exclude(content='foo') self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.query_filter), 1) def test_filter_and(self): sqs = self.search_index.objects.filter_and(content='foo') self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(sqs.query.query_filter.connector, 'AND') def test_filter_or(self): sqs = self.search_index.objects.filter_or(content='foo') self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(sqs.query.query_filter.connector, 'OR') def test_order_by(self): sqs = self.search_index.objects.order_by('foo') self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertTrue('foo' in sqs.query.order_by) def test_order_by_distance(self): p = Point(1.23, 4.56) sqs = self.search_index.objects.distance('location', p).order_by('distance') self.assertTrue(isinstance(sqs, SearchQuerySet)) params = sqs.query.build_params() self.assertIn('distance_point', params) self.assertDictEqual(params['distance_point'], {'field': 'location', 'point': p}) self.assertTupleEqual(params['distance_point']['point'].coords, (1.23, 4.56)) self.assertListEqual(params['sort_by'], ['distance']) def test_highlight(self): sqs = self.search_index.objects.highlight() self.assertEqual(sqs.query.highlight, True) def test_boost(self): sqs = self.search_index.objects.boost('foo', 10) self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.boost.keys()), 1) def test_facets(self): sqs = self.search_index.objects.facet('foo') self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.facets), 1) def test_within(self): # This is a meaningless query but we're just confirming that the manager updates the parameters here: p1 = Point(-90, -90) p2 = Point(90, 90) sqs = self.search_index.objects.within('location', p1, p2) self.assertTrue(isinstance(sqs, SearchQuerySet)) params = sqs.query.build_params() self.assertIn('within', params) self.assertDictEqual(params['within'], {'field': 'location', 'point_1': p1, 'point_2': p2}) def test_dwithin(self): p = Point(0, 0) distance = D(mi=500) sqs = self.search_index.objects.dwithin('location', p, distance) self.assertTrue(isinstance(sqs, SearchQuerySet)) params = sqs.query.build_params() self.assertIn('dwithin', params) self.assertDictEqual(params['dwithin'], {'field': 'location', 'point': p, 'distance': distance}) def test_distance(self): p = Point(0, 0) sqs = self.search_index.objects.distance('location', p) self.assertTrue(isinstance(sqs, SearchQuerySet)) params = sqs.query.build_params() self.assertIn('distance_point', params) self.assertDictEqual(params['distance_point'], {'field': 'location', 'point': p}) def test_date_facets(self): sqs = self.search_index.objects.date_facet('foo', start_date=datetime.date(2008, 2, 25), end_date=datetime.date(2009, 2, 25), gap_by='month') self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.date_facets), 1) def test_query_facets(self): sqs = self.search_index.objects.query_facet('foo', '[bar TO *]') self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.query_facets), 1) def test_narrow(self): sqs = self.search_index.objects.narrow("content:foo") self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertSetEqual(set(['content:foo']), sqs.query.narrow_queries) def test_raw_search(self): self.assertEqual(len(self.search_index.objects.raw_search('foo')), 23) def test_load_all(self): # Models with character primary keys. sqs = self.search_index.objects.all() sqs.query.backend = CharPKMockSearchBackend('charpk') results = sqs.load_all().all() self.assertEqual(len(results._result_cache), 0) def test_auto_query(self): sqs = self.search_index.objects.auto_query('test search -stuff') self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(repr(sqs.query.query_filter), '') # With keyword argument sqs = self.search_index.objects.auto_query('test search -stuff', fieldname='title') self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(repr(sqs.query.query_filter), "") def test_autocomplete(self): # Not implemented pass def test_count(self): self.assertEqual(SearchQuerySet().count(), 23) self.assertEqual(self.search_index.objects.count(), 23) def test_best_match(self): self.assertTrue(isinstance(self.search_index.objects.best_match(), SearchResult)) def test_latest(self): self.assertTrue(isinstance(self.search_index.objects.latest('pub_date'), SearchResult)) def test_more_like_this(self): mock = MockModel() mock.id = 1 self.assertEqual(len(self.search_index.objects.more_like_this(mock)), 23) def test_facet_counts(self): self.assertEqual(self.search_index.objects.facet_counts(), {}) def spelling_suggestion(self): # Test the case where spelling support is disabled. sqs = self.search_index.objects.filter(content='Indx') self.assertEqual(sqs.spelling_suggestion(), None) self.assertEqual(sqs.spelling_suggestion(preferred_query=None), None) def test_values(self): sqs = self.search_index.objects.auto_query("test").values("id") self.assert_(isinstance(sqs, ValuesSearchQuerySet)) def test_valueslist(self): sqs = self.search_index.objects.auto_query("test").values_list("id") self.assert_(isinstance(sqs, ValuesListSearchQuerySet)) class CustomManagerTestCase(TestCase): fixtures = ['bulk_data.json'] def setUp(self): super(CustomManagerTestCase, self).setUp() self.search_index_1 = CustomMockModelIndexWithObjectsManager self.search_index_2 = CustomMockModelIndexWithAnotherManager def test_filter_object_manager(self): sqs = self.search_index_1.objects.filter(content='foo') self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.query_filter), 2) def test_filter_another_manager(self): sqs = self.search_index_2.another.filter(content='foo') self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.query_filter), 2) django-haystack-2.8.0/test_haystack/test_models.py000066400000000000000000000201501325051407000223250ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import logging as std_logging import pickle from django.test import TestCase from test_haystack.core.models import MockModel from haystack import connections from haystack.models import SearchResult from haystack.utils import log as logging from haystack.utils.loading import UnifiedIndex from .mocks import MockSearchResult from .test_indexes import ReadQuerySetTestSearchIndex class CaptureHandler(std_logging.Handler): logs_seen = [] def emit(self, record): CaptureHandler.logs_seen.append(record) class SearchResultTestCase(TestCase): fixtures = ['base_data'] def setUp(self): super(SearchResultTestCase, self).setUp() cap = CaptureHandler() logging.getLogger('haystack').addHandler(cap) self.no_data = {} self.extra_data = { 'stored': 'I am stored data. How fun.', } self.no_overwrite_data = { 'django_id': 2, 'django_ct': 'haystack.anothermockmodel', 'stored': 'I am stored data. How fun.', } # The str(1) bit might seem unnecessary but it avoids test_unicode needing to handle # the differences between repr() output on Python 2 and 3 for a unicode literal: self.no_data_sr = MockSearchResult('haystack', 'mockmodel', str(1), 2) self.extra_data_sr = MockSearchResult('haystack', 'mockmodel', str(1), 3, **self.extra_data) self.no_overwrite_data_sr = MockSearchResult('haystack', 'mockmodel', str(1), 4, **self.no_overwrite_data) def test_init(self): self.assertEqual(self.no_data_sr.app_label, 'haystack') self.assertEqual(self.no_data_sr.model_name, 'mockmodel') self.assertEqual(self.no_data_sr.model, MockModel) self.assertEqual(self.no_data_sr.verbose_name, u'Mock model') self.assertEqual(self.no_data_sr.verbose_name_plural, u'Mock models') self.assertEqual(self.no_data_sr.pk, '1') self.assertEqual(self.no_data_sr.score, 2) self.assertEqual(self.no_data_sr.stored, None) self.assertEqual(self.extra_data_sr.app_label, 'haystack') self.assertEqual(self.extra_data_sr.model_name, 'mockmodel') self.assertEqual(self.extra_data_sr.model, MockModel) self.assertEqual(self.extra_data_sr.verbose_name, u'Mock model') self.assertEqual(self.extra_data_sr.verbose_name_plural, u'Mock models') self.assertEqual(self.extra_data_sr.pk, '1') self.assertEqual(self.extra_data_sr.score, 3) self.assertEqual(self.extra_data_sr.stored, 'I am stored data. How fun.') self.assertEqual(self.no_overwrite_data_sr.app_label, 'haystack') self.assertEqual(self.no_overwrite_data_sr.model_name, 'mockmodel') self.assertEqual(self.no_overwrite_data_sr.model, MockModel) self.assertEqual(self.no_overwrite_data_sr.verbose_name, u'Mock model') self.assertEqual(self.no_overwrite_data_sr.verbose_name_plural, u'Mock models') self.assertEqual(self.no_overwrite_data_sr.pk, '1') self.assertEqual(self.no_overwrite_data_sr.score, 4) self.assertEqual(self.no_overwrite_data_sr.stored, 'I am stored data. How fun.') def test_get_additional_fields(self): self.assertEqual(self.no_data_sr.get_additional_fields(), {}) self.assertEqual(self.extra_data_sr.get_additional_fields(), {'stored': 'I am stored data. How fun.'}) self.assertEqual(self.no_overwrite_data_sr.get_additional_fields(), {'django_ct': 'haystack.anothermockmodel', 'django_id': 2, 'stored': 'I am stored data. How fun.'}) def test_unicode(self): self.assertEqual(self.no_data_sr.__unicode__(), u"") self.assertEqual(self.extra_data_sr.__unicode__(), u"") self.assertEqual(self.no_overwrite_data_sr.__unicode__(), u"") def test_content_type(self): self.assertEqual(self.no_data_sr.content_type(), u'core.mockmodel') self.assertEqual(self.extra_data_sr.content_type(), u'core.mockmodel') self.assertEqual(self.no_overwrite_data_sr.content_type(), u'core.mockmodel') def test_stored_fields(self): # Stow. old_unified_index = connections['default']._index ui = UnifiedIndex() ui.build(indexes=[]) connections['default']._index = ui # Without registering, we should receive an empty dict. self.assertEqual(self.no_data_sr.get_stored_fields(), {}) self.assertEqual(self.extra_data_sr.get_stored_fields(), {}) self.assertEqual(self.no_overwrite_data_sr.get_stored_fields(), {}) from haystack import indexes class TestSearchIndex(indexes.SearchIndex, indexes.Indexable): stored = indexes.CharField(model_attr='author', document=True) def get_model(self): return MockModel # Include the index & try again. ui.document_field = 'stored' ui.build(indexes=[TestSearchIndex()]) self.assertEqual(self.no_data_sr.get_stored_fields(), {'stored': None}) self.assertEqual(self.extra_data_sr.get_stored_fields(), {'stored': 'I am stored data. How fun.'}) self.assertEqual(self.no_overwrite_data_sr.get_stored_fields(), {'stored': 'I am stored data. How fun.'}) # Restore. connections['default']._index = old_unified_index def test_missing_object(self): awol1 = SearchResult('core', 'mockmodel', '1000000', 2) self.assertEqual(awol1.app_label, 'core') self.assertEqual(awol1.model_name, 'mockmodel') self.assertEqual(awol1.pk, '1000000') self.assertEqual(awol1.score, 2) awol2 = SearchResult('core', 'yetanothermockmodel', '1000000', 2) self.assertEqual(awol2.app_label, 'core') self.assertEqual(awol2.model_name, 'yetanothermockmodel') self.assertEqual(awol2.pk, '1000000') self.assertEqual(awol2.score, 2) # Failed lookups should fail gracefully. CaptureHandler.logs_seen = [] self.assertEqual(awol1.model, MockModel) self.assertEqual(awol1.object, None) self.assertEqual(awol1.verbose_name, u'Mock model') self.assertEqual(awol1.verbose_name_plural, u'Mock models') self.assertEqual(awol1.stored, None) self.assertEqual(len(CaptureHandler.logs_seen), 4) CaptureHandler.logs_seen = [] self.assertEqual(awol2.model, None) self.assertEqual(awol2.object, None) self.assertEqual(awol2.verbose_name, u'') self.assertEqual(awol2.verbose_name_plural, u'') self.assertEqual(awol2.stored, None) self.assertEqual(len(CaptureHandler.logs_seen), 12) def test_read_queryset(self): # The model is flagged deleted so not returned by the default manager. deleted1 = SearchResult('core', 'afifthmockmodel', 2, 2) self.assertEqual(deleted1.object, None) # Stow. old_unified_index = connections['default']._index ui = UnifiedIndex() ui.document_field = 'author' ui.build(indexes=[ReadQuerySetTestSearchIndex()]) connections['default']._index = ui # The soft delete manager returns the object. deleted2 = SearchResult('core', 'afifthmockmodel', 2, 2) self.assertNotEqual(deleted2.object, None) self.assertEqual(deleted2.object.author, 'sam2') # Restore. connections['default']._index = old_unified_index def test_pickling(self): pickle_me_1 = SearchResult('core', 'mockmodel', '1000000', 2) picklicious = pickle.dumps(pickle_me_1) pickle_me_2 = pickle.loads(picklicious) self.assertEqual(pickle_me_1.app_label, pickle_me_2.app_label) self.assertEqual(pickle_me_1.model_name, pickle_me_2.model_name) self.assertEqual(pickle_me_1.pk, pickle_me_2.pk) self.assertEqual(pickle_me_1.score, pickle_me_2.score) django-haystack-2.8.0/test_haystack/test_query.py000066400000000000000000001062101325051407000222110ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import absolute_import, division, print_function, unicode_literals import datetime import unittest from django.test import TestCase from django.test.utils import override_settings from test_haystack.core.models import AnotherMockModel, CharPKMockModel, MockModel, UUIDMockModel from haystack import connections, indexes, reset_search_queries from haystack.backends import SQ, BaseSearchQuery from haystack.exceptions import FacetingError from haystack.models import SearchResult from haystack.query import EmptySearchQuerySet, SearchQuerySet, ValuesListSearchQuerySet, ValuesSearchQuerySet from haystack.utils.loading import UnifiedIndex from .mocks import (MOCK_SEARCH_RESULTS, CharPKMockSearchBackend, MockSearchBackend, MockSearchQuery, ReadQuerySetMockSearchBackend, UUIDMockSearchBackend) from .test_indexes import GhettoAFifthMockModelSearchIndex, TextReadQuerySetTestSearchIndex from .test_views import BasicAnotherMockModelSearchIndex, BasicMockModelSearchIndex test_pickling = True try: import pickle except ImportError: test_pickling = False class SQTestCase(TestCase): def test_split_expression(self): sq = SQ(foo='bar') self.assertEqual(sq.split_expression('foo'), ('foo', 'content')) self.assertEqual(sq.split_expression('foo__exact'), ('foo', 'exact')) self.assertEqual(sq.split_expression('foo__content'), ('foo', 'content')) self.assertEqual(sq.split_expression('foo__contains'), ('foo', 'contains')) self.assertEqual(sq.split_expression('foo__lt'), ('foo', 'lt')) self.assertEqual(sq.split_expression('foo__lte'), ('foo', 'lte')) self.assertEqual(sq.split_expression('foo__gt'), ('foo', 'gt')) self.assertEqual(sq.split_expression('foo__gte'), ('foo', 'gte')) self.assertEqual(sq.split_expression('foo__in'), ('foo', 'in')) self.assertEqual(sq.split_expression('foo__startswith'), ('foo', 'startswith')) self.assertEqual(sq.split_expression('foo__endswith'), ('foo', 'endswith')) self.assertEqual(sq.split_expression('foo__range'), ('foo', 'range')) self.assertEqual(sq.split_expression('foo__fuzzy'), ('foo', 'fuzzy')) # Unrecognized filter. Fall back to exact. self.assertEqual(sq.split_expression('foo__moof'), ('foo', 'content')) def test_repr(self): self.assertEqual(repr(SQ(foo='bar')), '') self.assertEqual(repr(SQ(foo=1)), '') self.assertEqual(repr(SQ(foo=datetime.datetime(2009, 5, 12, 23, 17))), '') def test_simple_nesting(self): sq1 = SQ(foo='bar') sq2 = SQ(foo='bar') bigger_sq = SQ(sq1 & sq2) self.assertEqual(repr(bigger_sq), '') another_bigger_sq = SQ(sq1 | sq2) self.assertEqual(repr(another_bigger_sq), '') one_more_bigger_sq = SQ(sq1 & ~sq2) self.assertEqual(repr(one_more_bigger_sq), '') mega_sq = SQ(bigger_sq & SQ(another_bigger_sq | ~one_more_bigger_sq)) self.assertEqual(repr(mega_sq), '') class BaseSearchQueryTestCase(TestCase): fixtures = ['base_data.json', 'bulk_data.json'] def setUp(self): super(BaseSearchQueryTestCase, self).setUp() self.bsq = BaseSearchQuery() def test_get_count(self): self.bsq.add_filter(SQ(foo='bar')) self.assertRaises(NotImplementedError, self.bsq.get_count) def test_build_query(self): self.bsq.add_filter(SQ(foo='bar')) self.assertRaises(NotImplementedError, self.bsq.build_query) def test_add_filter(self): self.assertEqual(len(self.bsq.query_filter), 0) self.bsq.add_filter(SQ(foo='bar')) self.assertEqual(len(self.bsq.query_filter), 1) self.bsq.add_filter(SQ(foo__lt='10')) self.bsq.add_filter(~SQ(claris='moof')) self.bsq.add_filter(SQ(claris='moof'), use_or=True) self.assertEqual(repr(self.bsq.query_filter), '') self.bsq.add_filter(SQ(claris='moof')) self.assertEqual(repr(self.bsq.query_filter), '') self.bsq.add_filter(SQ(claris='wtf mate')) self.assertEqual(repr(self.bsq.query_filter), '') def test_add_order_by(self): self.assertEqual(len(self.bsq.order_by), 0) self.bsq.add_order_by('foo') self.assertEqual(len(self.bsq.order_by), 1) def test_clear_order_by(self): self.bsq.add_order_by('foo') self.assertEqual(len(self.bsq.order_by), 1) self.bsq.clear_order_by() self.assertEqual(len(self.bsq.order_by), 0) def test_add_model(self): self.assertEqual(len(self.bsq.models), 0) self.assertRaises(AttributeError, self.bsq.add_model, object) self.assertEqual(len(self.bsq.models), 0) self.bsq.add_model(MockModel) self.assertEqual(len(self.bsq.models), 1) self.bsq.add_model(AnotherMockModel) self.assertEqual(len(self.bsq.models), 2) def test_set_limits(self): self.assertEqual(self.bsq.start_offset, 0) self.assertEqual(self.bsq.end_offset, None) self.bsq.set_limits(10, 50) self.assertEqual(self.bsq.start_offset, 10) self.assertEqual(self.bsq.end_offset, 50) def test_clear_limits(self): self.bsq.set_limits(10, 50) self.assertEqual(self.bsq.start_offset, 10) self.assertEqual(self.bsq.end_offset, 50) self.bsq.clear_limits() self.assertEqual(self.bsq.start_offset, 0) self.assertEqual(self.bsq.end_offset, None) def test_add_boost(self): self.assertEqual(self.bsq.boost, {}) self.bsq.add_boost('foo', 10) self.assertEqual(self.bsq.boost, {'foo': 10}) def test_add_highlight(self): self.assertEqual(self.bsq.highlight, False) self.bsq.add_highlight() self.assertEqual(self.bsq.highlight, True) def test_more_like_this(self): mock = MockModel() mock.id = 1 msq = MockSearchQuery() msq.backend = MockSearchBackend('mlt') ui = connections['default'].get_unified_index() bmmsi = BasicMockModelSearchIndex() ui.build(indexes=[bmmsi]) bmmsi.update() msq.more_like_this(mock) self.assertEqual(msq.get_count(), 23) self.assertEqual(int(msq.get_results()[0].pk), MOCK_SEARCH_RESULTS[0].pk) def test_add_field_facet(self): self.bsq.add_field_facet('foo') self.assertEqual(self.bsq.facets, {'foo': {}}) self.bsq.add_field_facet('bar') self.assertEqual(self.bsq.facets, {'foo': {}, 'bar': {}}) def test_add_date_facet(self): self.bsq.add_date_facet('foo', start_date=datetime.date(2009, 2, 25), end_date=datetime.date(2009, 3, 25), gap_by='day') self.assertEqual(self.bsq.date_facets, {'foo': {'gap_by': 'day', 'start_date': datetime.date(2009, 2, 25), 'end_date': datetime.date(2009, 3, 25), 'gap_amount': 1}}) self.bsq.add_date_facet('bar', start_date=datetime.date(2008, 1, 1), end_date=datetime.date(2009, 12, 1), gap_by='month') self.assertEqual(self.bsq.date_facets, {'foo': {'gap_by': 'day', 'start_date': datetime.date(2009, 2, 25), 'end_date': datetime.date(2009, 3, 25), 'gap_amount': 1}, 'bar': {'gap_by': 'month', 'start_date': datetime.date(2008, 1, 1), 'end_date': datetime.date(2009, 12, 1), 'gap_amount': 1}}) def test_add_query_facet(self): self.bsq.add_query_facet('foo', 'bar') self.assertEqual(self.bsq.query_facets, [('foo', 'bar')]) self.bsq.add_query_facet('moof', 'baz') self.assertEqual(self.bsq.query_facets, [('foo', 'bar'), ('moof', 'baz')]) self.bsq.add_query_facet('foo', 'baz') self.assertEqual(self.bsq.query_facets, [('foo', 'bar'), ('moof', 'baz'), ('foo', 'baz')]) def test_add_stats(self): self.bsq.add_stats_query('foo', ['bar']) self.assertEqual(self.bsq.stats, {'foo': ['bar']}) self.bsq.add_stats_query('moof', ['bar', 'baz']) self.assertEqual(self.bsq.stats, {'foo': ['bar'], 'moof': ['bar', 'baz']}) def test_add_narrow_query(self): self.bsq.add_narrow_query('foo:bar') self.assertEqual(self.bsq.narrow_queries, set(['foo:bar'])) self.bsq.add_narrow_query('moof:baz') self.assertEqual(self.bsq.narrow_queries, set(['foo:bar', 'moof:baz'])) def test_set_result_class(self): # Assert that we're defaulting to ``SearchResult``. self.assertTrue(issubclass(self.bsq.result_class, SearchResult)) # Custom class. class IttyBittyResult(object): pass self.bsq.set_result_class(IttyBittyResult) self.assertTrue(issubclass(self.bsq.result_class, IttyBittyResult)) # Reset to default. self.bsq.set_result_class(None) self.assertTrue(issubclass(self.bsq.result_class, SearchResult)) def test_run(self): # Stow. self.old_unified_index = connections['default']._index self.ui = UnifiedIndex() self.bmmsi = BasicMockModelSearchIndex() self.bammsi = BasicAnotherMockModelSearchIndex() self.ui.build(indexes=[self.bmmsi, self.bammsi]) connections['default']._index = self.ui # Update the "index". backend = connections['default'].get_backend() backend.clear() backend.update(self.bmmsi, MockModel.objects.all()) msq = connections['default'].get_query() self.assertEqual(len(msq.get_results()), 23) self.assertEqual(int(msq.get_results()[0].pk), MOCK_SEARCH_RESULTS[0].pk) # Restore. connections['default']._index = self.old_unified_index def test_clone(self): self.bsq.add_filter(SQ(foo='bar')) self.bsq.add_filter(SQ(foo__lt='10')) self.bsq.add_filter(~SQ(claris='moof')) self.bsq.add_filter(SQ(claris='moof'), use_or=True) self.bsq.add_order_by('foo') self.bsq.add_model(MockModel) self.bsq.add_boost('foo', 2) self.bsq.add_highlight() self.bsq.add_field_facet('foo') self.bsq.add_date_facet('foo', start_date=datetime.date(2009, 1, 1), end_date=datetime.date(2009, 1, 31), gap_by='day') self.bsq.add_query_facet('foo', 'bar') self.bsq.add_stats_query('foo', 'bar') self.bsq.add_narrow_query('foo:bar') clone = self.bsq._clone() self.assertTrue(isinstance(clone, BaseSearchQuery)) self.assertEqual(len(clone.query_filter), 2) self.assertEqual(len(clone.order_by), 1) self.assertEqual(len(clone.models), 1) self.assertEqual(len(clone.boost), 1) self.assertEqual(clone.highlight, True) self.assertEqual(len(clone.facets), 1) self.assertEqual(len(clone.date_facets), 1) self.assertEqual(len(clone.query_facets), 1) self.assertEqual(len(clone.narrow_queries), 1) self.assertEqual(clone.start_offset, self.bsq.start_offset) self.assertEqual(clone.end_offset, self.bsq.end_offset) self.assertEqual(clone.backend.__class__, self.bsq.backend.__class__) def test_log_query(self): reset_search_queries() self.assertEqual(len(connections['default'].queries), 0) # Stow. self.old_unified_index = connections['default']._index self.ui = UnifiedIndex() self.bmmsi = BasicMockModelSearchIndex() self.ui.build(indexes=[self.bmmsi]) connections['default']._index = self.ui # Update the "index". backend = connections['default'].get_backend() backend.clear() self.bmmsi.update() with self.settings(DEBUG=False): msq = connections['default'].get_query() self.assertEqual(len(msq.get_results()), 23) self.assertEqual(len(connections['default'].queries), 0) with self.settings(DEBUG=True): # Redefine it to clear out the cached results. msq2 = connections['default'].get_query() self.assertEqual(len(msq2.get_results()), 23) self.assertEqual(len(connections['default'].queries), 1) self.assertEqual(connections['default'].queries[0]['query_string'], '') msq3 = connections['default'].get_query() msq3.add_filter(SQ(foo='bar')) len(msq3.get_results()) self.assertEqual(len(connections['default'].queries), 2) self.assertEqual(connections['default'].queries[0]['query_string'], '') self.assertEqual(connections['default'].queries[1]['query_string'], '') # Restore. connections['default']._index = self.old_unified_index class CharPKMockModelSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, model_attr='key') def get_model(self): return CharPKMockModel class SimpleMockUUIDModelIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, model_attr="characteristics") def get_model(self): return UUIDMockModel @override_settings(DEBUG=True) class SearchQuerySetTestCase(TestCase): fixtures = ['base_data.json', 'bulk_data.json'] def setUp(self): super(SearchQuerySetTestCase, self).setUp() # Stow. self.old_unified_index = connections['default']._index self.ui = UnifiedIndex() self.bmmsi = BasicMockModelSearchIndex() self.cpkmmsi = CharPKMockModelSearchIndex() self.uuidmmsi = SimpleMockUUIDModelIndex() self.ui.build(indexes=[self.bmmsi, self.cpkmmsi, self.uuidmmsi]) connections['default']._index = self.ui # Update the "index". backend = connections['default'].get_backend() backend.clear() backend.update(self.bmmsi, MockModel.objects.all()) self.msqs = SearchQuerySet() # Stow. reset_search_queries() def tearDown(self): # Restore. connections['default']._index = self.old_unified_index super(SearchQuerySetTestCase, self).tearDown() def test_len(self): self.assertEqual(len(self.msqs), 23) def test_repr(self): reset_search_queries() self.assertEqual(len(connections['default'].queries), 0) self.assertRegexpMatches(repr(self.msqs), r'^, using=None>$') def test_iter(self): reset_search_queries() self.assertEqual(len(connections['default'].queries), 0) msqs = self.msqs.all() results = [int(res.pk) for res in iter(msqs)] self.assertEqual(results, [res.pk for res in MOCK_SEARCH_RESULTS[:23]]) self.assertEqual(len(connections['default'].queries), 3) def test_slice(self): reset_search_queries() self.assertEqual(len(connections['default'].queries), 0) results = self.msqs.all() self.assertEqual([int(res.pk) for res in results[1:11]], [res.pk for res in MOCK_SEARCH_RESULTS[1:11]]) self.assertEqual(len(connections['default'].queries), 1) reset_search_queries() self.assertEqual(len(connections['default'].queries), 0) results = self.msqs.all() self.assertEqual(int(results[22].pk), MOCK_SEARCH_RESULTS[22].pk) self.assertEqual(len(connections['default'].queries), 1) def test_manual_iter(self): results = self.msqs.all() reset_search_queries() self.assertEqual(len(connections['default'].queries), 0) check = [result.pk for result in results._manual_iter()] self.assertEqual(check, [u'1', u'2', u'3', u'4', u'5', u'6', u'7', u'8', u'9', u'10', u'11', u'12', u'13', u'14', u'15', u'16', u'17', u'18', u'19', u'20', u'21', u'22', u'23']) self.assertEqual(len(connections['default'].queries), 3) reset_search_queries() self.assertEqual(len(connections['default'].queries), 0) # Test to ensure we properly fill the cache, even if we get fewer # results back (not a handled model) than the hit count indicates. # This will hang indefinitely if broken. # CharPK testing old_ui = self.ui self.ui.build(indexes=[self.cpkmmsi]) connections['default']._index = self.ui self.cpkmmsi.update() results = self.msqs.all() loaded = [result.pk for result in results._manual_iter()] self.assertEqual(loaded, [u'sometext', u'1234']) self.assertEqual(len(connections['default'].queries), 1) #UUID testing self.ui.build(indexes=[self.uuidmmsi]) connections['default']._index = self.ui self.uuidmmsi.update() results = self.msqs.all() loaded = [result.pk for result in results._manual_iter()] self.assertEqual(loaded, [u'53554c58-7051-4350-bcc9-dad75eb248a9', u'77554c58-7051-4350-bcc9-dad75eb24888']) connections['default']._index = old_ui def test_cache_is_full(self): reset_search_queries() self.assertEqual(len(connections['default'].queries), 0) self.assertEqual(self.msqs._cache_is_full(), False) results = self.msqs.all() fire_the_iterator_and_fill_cache = list(results) self.assertEqual(23, len(fire_the_iterator_and_fill_cache)) self.assertEqual(results._cache_is_full(), True) self.assertEqual(len(connections['default'].queries), 4) def test_all(self): sqs = self.msqs.all() self.assertTrue(isinstance(sqs, SearchQuerySet)) def test_filter(self): sqs = self.msqs.filter(content='foo') self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.query_filter), 1) def test_exclude(self): sqs = self.msqs.exclude(content='foo') self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.query_filter), 1) def test_order_by(self): sqs = self.msqs.order_by('foo') self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertTrue('foo' in sqs.query.order_by) def test_models(self): # Stow. old_unified_index = connections['default']._index ui = UnifiedIndex() bmmsi = BasicMockModelSearchIndex() bammsi = BasicAnotherMockModelSearchIndex() ui.build(indexes=[bmmsi, bammsi]) connections['default']._index = ui msqs = SearchQuerySet() sqs = msqs.all() self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.models), 0) sqs = msqs.models(MockModel) self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.models), 1) sqs = msqs.models(MockModel, AnotherMockModel) self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.models), 2) # This will produce a warning. ui.build(indexes=[bmmsi]) sqs = msqs.models(AnotherMockModel) self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.models), 1) def test_result_class(self): sqs = self.msqs.all() self.assertTrue(issubclass(sqs.query.result_class, SearchResult)) # Custom class. class IttyBittyResult(object): pass sqs = self.msqs.result_class(IttyBittyResult) self.assertTrue(issubclass(sqs.query.result_class, IttyBittyResult)) # Reset to default. sqs = self.msqs.result_class(None) self.assertTrue(issubclass(sqs.query.result_class, SearchResult)) def test_boost(self): sqs = self.msqs.boost('foo', 10) self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.boost.keys()), 1) def test_highlight(self): sqs = self.msqs.highlight() self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(sqs.query.highlight, True) def test_spelling_override(self): sqs = self.msqs.filter(content='not the spellchecking query') self.assertEqual(sqs.query.spelling_query, None) sqs = self.msqs.set_spelling_query('override') self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(sqs.query.spelling_query, 'override') def test_spelling_suggestions(self): # Test the case where spelling support is disabled. sqs = self.msqs.filter(content='Indx') self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(sqs.spelling_suggestion(), None) self.assertEqual(sqs.spelling_suggestion('indexy'), None) def test_raw_search(self): self.assertEqual(len(self.msqs.raw_search('foo')), 23) self.assertEqual(len(self.msqs.raw_search('(content__exact:hello AND content__exact:world)')), 23) def test_load_all(self): # Models with character primary keys. sqs = SearchQuerySet() sqs.query.backend = CharPKMockSearchBackend('charpk') results = sqs.load_all().all() self.assertEqual(len(results._result_cache), 0) results._fill_cache(0, 2) self.assertEqual(len([result for result in results._result_cache if result is not None]), 2) # Models with uuid primary keys. sqs = SearchQuerySet() sqs.query.backend = UUIDMockSearchBackend('uuid') results = sqs.load_all().all() self.assertEqual(len(results._result_cache), 0) results._fill_cache(0, 2) self.assertEqual(len([result for result in results._result_cache if result is not None]), 2) # If nothing is handled, you get nothing. old_ui = connections['default']._index ui = UnifiedIndex() ui.build(indexes=[]) connections['default']._index = ui sqs = self.msqs.load_all() self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs), 0) connections['default']._index = old_ui # For full tests, see the solr_backend. def test_load_all_read_queryset(self): # Stow. old_ui = connections['default']._index ui = UnifiedIndex() gafmmsi = GhettoAFifthMockModelSearchIndex() ui.build(indexes=[gafmmsi]) connections['default']._index = ui gafmmsi.update() sqs = SearchQuerySet() results = sqs.load_all().all() results.query.backend = ReadQuerySetMockSearchBackend('default') results._fill_cache(0, 2) # The deleted result isn't returned self.assertEqual(len([result for result in results._result_cache if result is not None]), 1) # Register a SearchIndex with a read_queryset that returns deleted items rqstsi = TextReadQuerySetTestSearchIndex() ui.build(indexes=[rqstsi]) rqstsi.update() sqs = SearchQuerySet() results = sqs.load_all().all() results.query.backend = ReadQuerySetMockSearchBackend('default') results._fill_cache(0, 2) # Both the deleted and not deleted items are returned self.assertEqual(len([result for result in results._result_cache if result is not None]), 2) # Restore. connections['default']._index = old_ui def test_auto_query(self): sqs = self.msqs.auto_query('test search -stuff') self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(repr(sqs.query.query_filter), '') sqs = self.msqs.auto_query('test "my thing" search -stuff') self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(repr(sqs.query.query_filter), '') sqs = self.msqs.auto_query('test "my thing" search \'moar quotes\' -stuff') self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(repr(sqs.query.query_filter), '') sqs = self.msqs.auto_query('test "my thing" search \'moar quotes\' "foo -stuff') self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(repr(sqs.query.query_filter), '') sqs = self.msqs.auto_query('test - stuff') self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(repr(sqs.query.query_filter), "") # Ensure bits in exact matches get escaped properly as well. sqs = self.msqs.auto_query('"pants:rule"') self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(repr(sqs.query.query_filter), '') # Now with a different fieldname sqs = self.msqs.auto_query('test search -stuff', fieldname='title') self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(repr(sqs.query.query_filter), "") sqs = self.msqs.auto_query('test "my thing" search -stuff', fieldname='title') self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(repr(sqs.query.query_filter), '') def test_count(self): self.assertEqual(self.msqs.count(), 23) def test_facet_counts(self): self.assertEqual(self.msqs.facet_counts(), {}) def test_best_match(self): self.assertTrue(isinstance(self.msqs.best_match(), SearchResult)) def test_latest(self): self.assertTrue(isinstance(self.msqs.latest('pub_date'), SearchResult)) def test_more_like_this(self): mock = MockModel() mock.id = 1 self.assertEqual(len(self.msqs.more_like_this(mock)), 23) def test_facets(self): sqs = self.msqs.facet('foo') self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.facets), 1) sqs2 = self.msqs.facet('foo').facet('bar') self.assertTrue(isinstance(sqs2, SearchQuerySet)) self.assertEqual(len(sqs2.query.facets), 2) def test_date_facets(self): try: sqs = self.msqs.date_facet('foo', start_date=datetime.date(2008, 2, 25), end_date=datetime.date(2009, 2, 25), gap_by='smarblaph') self.fail() except FacetingError as e: self.assertEqual(str(e), "The gap_by ('smarblaph') must be one of the following: year, month, day, hour, minute, second.") sqs = self.msqs.date_facet('foo', start_date=datetime.date(2008, 2, 25), end_date=datetime.date(2009, 2, 25), gap_by='month') self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.date_facets), 1) sqs2 = self.msqs.date_facet('foo', start_date=datetime.date(2008, 2, 25), end_date=datetime.date(2009, 2, 25), gap_by='month').date_facet('bar', start_date=datetime.date(2007, 2, 25), end_date=datetime.date(2009, 2, 25), gap_by='year') self.assertTrue(isinstance(sqs2, SearchQuerySet)) self.assertEqual(len(sqs2.query.date_facets), 2) def test_query_facets(self): sqs = self.msqs.query_facet('foo', '[bar TO *]') self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.query_facets), 1) sqs2 = self.msqs.query_facet('foo', '[bar TO *]').query_facet('bar', '[100 TO 499]') self.assertTrue(isinstance(sqs2, SearchQuerySet)) self.assertEqual(len(sqs2.query.query_facets), 2) # Test multiple query facets on a single field sqs3 = self.msqs.query_facet('foo', '[bar TO *]').query_facet('bar', '[100 TO 499]').query_facet('foo', '[1000 TO 1499]') self.assertTrue(isinstance(sqs3, SearchQuerySet)) self.assertEqual(len(sqs3.query.query_facets), 3) def test_stats(self): sqs = self.msqs.stats_facet('foo', 'bar') self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.stats), 1) sqs2 = self.msqs.stats_facet('foo', 'bar').stats_facet('foo', 'baz') self.assertTrue(isinstance(sqs2, SearchQuerySet)) self.assertEqual(len(sqs2.query.stats), 1) sqs3 = self.msqs.stats_facet('foo', 'bar').stats_facet('moof', 'baz') self.assertTrue(isinstance(sqs3, SearchQuerySet)) self.assertEqual(len(sqs3.query.stats), 2) def test_narrow(self): sqs = self.msqs.narrow('foo:moof') self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.narrow_queries), 1) def test_clone(self): results = self.msqs.filter(foo='bar', foo__lt='10') clone = results._clone() self.assertTrue(isinstance(clone, SearchQuerySet)) self.assertEqual(str(clone.query), str(results.query)) self.assertEqual(clone._result_cache, []) self.assertEqual(clone._result_count, None) self.assertEqual(clone._cache_full, False) self.assertEqual(clone._using, results._using) def test_using(self): sqs = SearchQuerySet(using='default') self.assertNotEqual(sqs.query, None) self.assertEqual(sqs.query._using, 'default') def test_chaining(self): sqs = self.msqs.filter(content='foo') self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.query_filter), 1) # A second instance should inherit none of the changes from above. sqs = self.msqs.filter(content='bar') self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.query_filter), 1) def test_none(self): sqs = self.msqs.none() self.assertTrue(isinstance(sqs, EmptySearchQuerySet)) self.assertEqual(len(sqs), 0) def test___and__(self): sqs1 = self.msqs.filter(content='foo') sqs2 = self.msqs.filter(content='bar') sqs = sqs1 & sqs2 self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.query_filter), 2) def test___or__(self): sqs1 = self.msqs.filter(content='foo') sqs2 = self.msqs.filter(content='bar') sqs = sqs1 | sqs2 self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.query_filter), 2) def test_and_or(self): """ Combining AND queries with OR should give AND(OR(a, b), OR(c, d)) """ sqs1 = self.msqs.filter(content='foo').filter(content='oof') sqs2 = self.msqs.filter(content='bar').filter(content='rab') sqs = sqs1 | sqs2 self.assertEqual(sqs.query.query_filter.connector, 'OR') self.assertEqual(repr(sqs.query.query_filter.children[0]), repr(sqs1.query.query_filter)) self.assertEqual(repr(sqs.query.query_filter.children[1]), repr(sqs2.query.query_filter)) def test_or_and(self): """ Combining OR queries with AND should give OR(AND(a, b), AND(c, d)) """ sqs1 = self.msqs.filter(content='foo').filter_or(content='oof') sqs2 = self.msqs.filter(content='bar').filter_or(content='rab') sqs = sqs1 & sqs2 self.assertEqual(sqs.query.query_filter.connector, 'AND') self.assertEqual(repr(sqs.query.query_filter.children[0]), repr(sqs1.query.query_filter)) self.assertEqual(repr(sqs.query.query_filter.children[1]), repr(sqs2.query.query_filter)) class ValuesQuerySetTestCase(SearchQuerySetTestCase): def test_values_sqs(self): sqs = self.msqs.auto_query("test").values("id") self.assert_(isinstance(sqs, ValuesSearchQuerySet)) # We'll do a basic test to confirm that slicing works as expected: self.assert_(isinstance(sqs[0], dict)) self.assert_(isinstance(sqs[0:5][0], dict)) def test_valueslist_sqs(self): sqs = self.msqs.auto_query("test").values_list("id") self.assert_(isinstance(sqs, ValuesListSearchQuerySet)) self.assert_(isinstance(sqs[0], (list, tuple))) self.assert_(isinstance(sqs[0:1][0], (list, tuple))) self.assertRaises(TypeError, self.msqs.auto_query("test").values_list, "id", "score", flat=True) flat_sqs = self.msqs.auto_query("test").values_list("id", flat=True) self.assert_(isinstance(sqs, ValuesListSearchQuerySet)) # Note that this will actually be None because a mocked sqs lacks # anything else: self.assert_(flat_sqs[0] is None) self.assert_(flat_sqs[0:1][0] is None) class EmptySearchQuerySetTestCase(TestCase): def setUp(self): super(EmptySearchQuerySetTestCase, self).setUp() self.esqs = EmptySearchQuerySet() def test_get_count(self): self.assertEqual(self.esqs.count(), 0) self.assertEqual(len(self.esqs.all()), 0) def test_filter(self): sqs = self.esqs.filter(content='foo') self.assertTrue(isinstance(sqs, EmptySearchQuerySet)) self.assertEqual(len(sqs), 0) def test_exclude(self): sqs = self.esqs.exclude(content='foo') self.assertTrue(isinstance(sqs, EmptySearchQuerySet)) self.assertEqual(len(sqs), 0) def test_slice(self): sqs = self.esqs.filter(content='foo') self.assertTrue(isinstance(sqs, EmptySearchQuerySet)) self.assertEqual(len(sqs), 0) self.assertEqual(sqs[:10], []) try: sqs[4] self.fail() except IndexError: pass def test_dictionary_lookup(self): """ Ensure doing a dictionary lookup raises a TypeError so EmptySearchQuerySets can be used in templates. """ self.assertRaises(TypeError, lambda: self.esqs['count']) @unittest.skipUnless(test_pickling, 'Skipping pickling tests') @override_settings(DEBUG=True) class PickleSearchQuerySetTestCase(TestCase): fixtures = ['base_data'] def setUp(self): super(PickleSearchQuerySetTestCase, self).setUp() # Stow. self.old_unified_index = connections['default']._index self.ui = UnifiedIndex() self.bmmsi = BasicMockModelSearchIndex() self.cpkmmsi = CharPKMockModelSearchIndex() self.ui.build(indexes=[self.bmmsi, self.cpkmmsi]) connections['default']._index = self.ui # Update the "index". backend = connections['default'].get_backend() backend.clear() backend.update(self.bmmsi, MockModel.objects.all()) self.msqs = SearchQuerySet() # Stow. reset_search_queries() def tearDown(self): # Restore. connections['default']._index = self.old_unified_index super(PickleSearchQuerySetTestCase, self).tearDown() def test_pickling(self): results = self.msqs.all() for res in results: # Make sure the cache is full. pass in_a_pickle = pickle.dumps(results) like_a_cuke = pickle.loads(in_a_pickle) self.assertEqual(len(like_a_cuke), len(results)) self.assertEqual(like_a_cuke[0].id, results[0].id) django-haystack-2.8.0/test_haystack/test_templatetags.py000066400000000000000000000116521325051407000235430ustar00rootroot00000000000000# -*- coding: utf-8 -*- from __future__ import absolute_import, division, print_function, unicode_literals from django.conf import settings from django.core.exceptions import ImproperlyConfigured from django.template import Context, Template from django.test import TestCase from haystack.utils.highlighting import Highlighter class BorkHighlighter(Highlighter): def render_html(self, highlight_locations=None, start_offset=None, end_offset=None): highlighted_chunk = self.text_block[start_offset:end_offset] for word in self.query_words: highlighted_chunk = highlighted_chunk.replace(word, 'Bork!') return highlighted_chunk class TemplateTagTestCase(TestCase): def render(self, template, context): # Why on Earth does Django not have a TemplateTestCase yet? t = Template(template) c = Context(context) return t.render(c) class HighlightTestCase(TemplateTagTestCase): def setUp(self): super(HighlightTestCase, self).setUp() self.sample_entry = """ Registering indexes in Haystack is very similar to registering models and ModelAdmin classes in the Django admin site. If you want to override the default indexing behavior for your model you can specify your own SearchIndex class. This is useful for ensuring that future-dated or non-live content is not indexed and searchable. Every custom SearchIndex requires there be one and only one field with document=True. This is the primary field that will get passed to the backend for indexing. For this field, you'll then need to create a template at search/indexes/myapp/note_text.txt. This allows you to customize the document that will be passed to the search backend for indexing. A sample template might look like. In addition, you may specify other fields to be populated along with the document. In this case, we also index the user who authored the document as well as the date the document was published. The variable you assign the SearchField to should directly map to the field your search backend is expecting. You instantiate most search fields with a parameter that points to the attribute of the object to populate that field with. """ def test_simple(self): template = """{% load highlight %}{% highlight entry with query %}""" context = { 'entry': self.sample_entry, 'query': 'index', } self.assertEqual(self.render(template, context), u'...indexing behavior for your model you can specify your own SearchIndex class.\nThis is useful for ensuring that future-dated or non-live content is not indexed\nand searchable.\n\nEvery custom SearchIndex ...') template = """{% load highlight %}{% highlight entry with query html_tag "div" css_class "foo" max_length 100 %}""" context = { 'entry': self.sample_entry, 'query': 'field', } self.assertEqual(self.render(template, context), u'...
field
with\ndocument=True. This is the primary
field
that will get passed to the backend\nfor indexing...') template = """{% load highlight %}{% highlight entry with query html_tag "div" css_class "foo" max_length 100 %}""" context = { 'entry': self.sample_entry, 'query': 'Haystack', } self.assertEqual(self.render(template, context), u'...
Haystack
is very similar to registering models and\nModelAdmin classes in the Django admin site. If y...') template = """{% load highlight %}{% highlight "xxxxxxxxxxxxx foo bbxxxxx foo" with "foo" max_length 5 html_tag "span" %}""" context = {} self.assertEqual(self.render(template, context), u'...foo b...') def test_custom(self): # Stow. old_custom_highlighter = getattr(settings, 'HAYSTACK_CUSTOM_HIGHLIGHTER', None) settings.HAYSTACK_CUSTOM_HIGHLIGHTER = 'not.here.FooHighlighter' template = """{% load highlight %}{% highlight entry with query %}""" context = { 'entry': self.sample_entry, 'query': 'index', } self.assertRaises(ImproperlyConfigured, self.render, template, context) settings.HAYSTACK_CUSTOM_HIGHLIGHTER = 'test_haystack.test_templatetags.BorkHighlighter' template = """{% load highlight %}{% highlight entry with query %}""" context = { 'entry': self.sample_entry, 'query': 'index', } self.assertEqual(self.render(template, context), u'Bork!ing behavior for your model you can specify your own SearchIndex class.\nThis is useful for ensuring that future-dated or non-live content is not Bork!ed\nand searchable.\n\nEvery custom SearchIndex ') # Restore. settings.HAYSTACK_CUSTOM_HIGHLIGHTER = old_custom_highlighter django-haystack-2.8.0/test_haystack/test_utils.py000066400000000000000000000330661325051407000222140ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.test import TestCase from django.test.utils import override_settings from test_haystack.core.models import MockModel from haystack.utils import _lookup_identifier_method, get_facet_field_name, get_identifier, log from haystack.utils.highlighting import Highlighter class GetIdentifierTestCase(TestCase): def test_get_facet_field_name(self): self.assertEqual(get_facet_field_name('id'), 'id') self.assertEqual(get_facet_field_name('django_id'), 'django_id') self.assertEqual(get_facet_field_name('django_ct'), 'django_ct') self.assertEqual(get_facet_field_name('author'), 'author_exact') self.assertEqual(get_facet_field_name('author_exact'), 'author_exact_exact') class GetFacetFieldNameTestCase(TestCase): fixtures = ['base_data'] def test_get_identifier(self): self.assertEqual(get_identifier('core.mockmodel.1'), 'core.mockmodel.1') # Valid object. mock = MockModel.objects.get(pk=1) self.assertEqual(get_identifier(mock), 'core.mockmodel.1') @override_settings(HAYSTACK_IDENTIFIER_METHOD='test_haystack.core.custom_identifier.get_identifier_method') def test_haystack_identifier_method(self): # The custom implementation returns the MD-5 hash of the key value by # default: get_identifier = _lookup_identifier_method() self.assertEqual(get_identifier('a.b.c'), '553f764f7b436175c0387e22b4a19213') # … but it also supports a custom override mechanism which would # definitely fail with the default implementation: class custom_id_class(object): def get_custom_haystack_id(self): return 'CUSTOM' self.assertEqual(get_identifier(custom_id_class()), 'CUSTOM') @override_settings(HAYSTACK_IDENTIFIER_METHOD='test_haystack.core.custom_identifier.not_there') def test_haystack_identifier_method_bad_path(self): self.assertRaises(AttributeError, _lookup_identifier_method) @override_settings(HAYSTACK_IDENTIFIER_METHOD='core.not_there.not_there') def test_haystack_identifier_method_bad_module(self): self.assertRaises(ImportError, _lookup_identifier_method) class HighlighterTestCase(TestCase): def setUp(self): super(HighlighterTestCase, self).setUp() self.document_1 = "This is a test of the highlightable words detection. This is only a test. Were this an actual emergency, your text would have exploded in mid-air." self.document_2 = "The content of words in no particular order causes nothing to occur." self.document_3 = "%s %s" % (self.document_1, self.document_2) def test_find_highlightable_words(self): highlighter = Highlighter('this test') highlighter.text_block = self.document_1 self.assertEqual(highlighter.find_highlightable_words(), {'this': [0, 53, 79], 'test': [10, 68]}) # We don't stem for now. highlighter = Highlighter('highlight tests') highlighter.text_block = self.document_1 self.assertEqual(highlighter.find_highlightable_words(), {'highlight': [22], 'tests': []}) # Ignore negated bits. highlighter = Highlighter('highlight -test') highlighter.text_block = self.document_1 self.assertEqual(highlighter.find_highlightable_words(), {'highlight': [22]}) def test_find_window(self): # The query doesn't matter for this method, so ignore it. highlighter = Highlighter('') highlighter.text_block = self.document_1 # No query. self.assertEqual(highlighter.find_window({}), (0, 200)) # Nothing found. self.assertEqual(highlighter.find_window({'highlight': [], 'tests': []}), (0, 200)) # Simple cases. self.assertEqual(highlighter.find_window({'highlight': [0], 'tests': [100]}), (0, 200)) self.assertEqual(highlighter.find_window({'highlight': [99], 'tests': [199]}), (99, 299)) self.assertEqual(highlighter.find_window({'highlight': [0], 'tests': [201]}), (0, 200)) self.assertEqual(highlighter.find_window({'highlight': [203], 'tests': [120]}), (120, 320)) self.assertEqual(highlighter.find_window({'highlight': [], 'tests': [100]}), (100, 300)) self.assertEqual(highlighter.find_window({'highlight': [0], 'tests': [80], 'moof': [120]}), (0, 200)) # Simple cases, with an outlier far outside the window. self.assertEqual(highlighter.find_window({'highlight': [0], 'tests': [100, 450]}), (0, 200)) self.assertEqual(highlighter.find_window({'highlight': [100], 'tests': [220, 450]}), (100, 300)) self.assertEqual(highlighter.find_window({'highlight': [100], 'tests': [350, 450]}), (350, 550)) self.assertEqual(highlighter.find_window({'highlight': [100], 'tests': [220], 'moof': [450]}), (100, 300)) # Density checks. self.assertEqual(highlighter.find_window({'highlight': [0], 'tests': [100, 180, 450]}), (0, 200)) self.assertEqual(highlighter.find_window({'highlight': [0, 40], 'tests': [100, 200, 220, 450]}), (40, 240)) self.assertEqual(highlighter.find_window({'highlight': [0, 40], 'tests': [100, 200, 220], 'moof': [450]}), (40, 240)) self.assertEqual(highlighter.find_window({'highlight': [0, 40], 'tests': [100, 200, 220], 'moof': [294, 299, 450]}), (100, 300)) def test_render_html(self): highlighter = Highlighter('this test') highlighter.text_block = self.document_1 self.assertEqual(highlighter.render_html({'this': [0, 53, 79], 'test': [10, 68]}, 0, 200), 'This is a test of the highlightable words detection. This is only a test. Were this an actual emergency, your text would have exploded in mid-air.') highlighter.text_block = self.document_2 self.assertEqual(highlighter.render_html({'this': [0, 53, 79], 'test': [10, 68]}, 0, 200), 'The content of words in no particular order causes nothing to occur.') highlighter.text_block = self.document_3 self.assertEqual(highlighter.render_html({'this': [0, 53, 79], 'test': [10, 68]}, 0, 200), 'This is a test of the highlightable words detection. This is only a test. Were this an actual emergency, your text would have exploded in mid-air. The content of words in no particular order causes no...') highlighter = Highlighter('content detection') highlighter.text_block = self.document_3 self.assertEqual(highlighter.render_html({'content': [151], 'detection': [42]}, 42, 242), '...detection. This is only a test. Were this an actual emergency, your text would have exploded in mid-air. The content of words in no particular order causes nothing to occur.') self.assertEqual(highlighter.render_html({'content': [151], 'detection': [42]}, 42, 200), '...detection. This is only a test. Were this an actual emergency, your text would have exploded in mid-air. The content of words in no particular order causes no...') # One term found within another term. highlighter = Highlighter('this is') highlighter.text_block = self.document_1 self.assertEqual(highlighter.render_html({'this': [0, 53, 79], 'is': [2, 5, 55, 58, 81]}, 0, 200), 'This is a test of the highlightable words detection. This is only a test. Were this an actual emergency, your text would have exploded in mid-air.') # Regression for repetition in the regular expression. highlighter = Highlighter('i++') highlighter.text_block = 'Foo is i++ in most cases.' self.assertEqual(highlighter.render_html({'i++': [7]}, 0, 200), 'Foo is i++ in most cases.') highlighter = Highlighter('i**') highlighter.text_block = 'Foo is i** in most cases.' self.assertEqual(highlighter.render_html({'i**': [7]}, 0, 200), 'Foo is i** in most cases.') highlighter = Highlighter('i..') highlighter.text_block = 'Foo is i.. in most cases.' self.assertEqual(highlighter.render_html({'i..': [7]}, 0, 200), 'Foo is i.. in most cases.') highlighter = Highlighter('i??') highlighter.text_block = 'Foo is i?? in most cases.' self.assertEqual(highlighter.render_html({'i??': [7]}, 0, 200), 'Foo is i?? in most cases.') # Regression for highlighting already highlighted HTML terms. highlighter = Highlighter('span') highlighter.text_block = 'A span in spam makes html in a can.' self.assertEqual(highlighter.render_html({'span': [2]}, 0, 200), 'A span in spam makes html in a can.') highlighter = Highlighter('highlight') highlighter.text_block = 'A span in spam makes highlighted html in a can.' self.assertEqual(highlighter.render_html({'highlight': [21]}, 0, 200), 'A span in spam makes highlighted html in a can.') def test_highlight(self): highlighter = Highlighter('this test') self.assertEqual(highlighter.highlight(self.document_1), u'This is a test of the highlightable words detection. This is only a test. Were this an actual emergency, your text would have exploded in mid-air.') self.assertEqual(highlighter.highlight(self.document_2), u'The content of words in no particular order causes nothing to occur.') self.assertEqual(highlighter.highlight(self.document_3), u'This is a test of the highlightable words detection. This is only a test. Were this an actual emergency, your text would have exploded in mid-air. The content of words in no particular order causes no...') highlighter = Highlighter('this test', html_tag='div', css_class=None) self.assertEqual(highlighter.highlight(self.document_1), u'
This
is a
test
of the highlightable words detection.
This
is only a
test
. Were
this
an actual emergency, your text would have exploded in mid-air.') self.assertEqual(highlighter.highlight(self.document_2), u'The content of words in no particular order causes nothing to occur.') self.assertEqual(highlighter.highlight(self.document_3), u'
This
is a
test
of the highlightable words detection.
This
is only a
test
. Were
this
an actual emergency, your text would have exploded in mid-air. The content of words in no particular order causes no...') highlighter = Highlighter('content detection') self.assertEqual(highlighter.highlight(self.document_1), u'...detection. This is only a test. Were this an actual emergency, your text would have exploded in mid-air.') self.assertEqual(highlighter.highlight(self.document_2), u'...content of words in no particular order causes nothing to occur.') self.assertEqual(highlighter.highlight(self.document_3), u'...detection. This is only a test. Were this an actual emergency, your text would have exploded in mid-air. The content of words in no particular order causes nothing to occur.') highlighter = Highlighter('content detection', max_length=100) self.assertEqual(highlighter.highlight(self.document_1), u'...detection. This is only a test. Were this an actual emergency, your text would have exploded in mid-...') self.assertEqual(highlighter.highlight(self.document_2), u'...content of words in no particular order causes nothing to occur.') self.assertEqual(highlighter.highlight(self.document_3), u'This is a test of the highlightable words detection. This is only a test. Were this an actual emerge...') class LoggingFacadeTestCase(TestCase): def test_everything_noops_if_settings_are_off(self): with self.settings(HAYSTACK_LOGGING=False): l = log.LoggingFacade(None) l.error() def test_uses_provided_logger_if_logging_is_on(self): with self.settings(HAYSTACK_LOGGING=True): l = log.LoggingFacade(None) try: l.error() except AttributeError: pass def test_uses_provided_logger_by_default(self): class Logger(object): def __init__(self): self.was_called = False def error(self): self.was_called = True l = log.LoggingFacade(Logger()) self.assertFalse(l.was_called, msg='sanity check') l.error() self.assertTrue(l.was_called) django-haystack-2.8.0/test_haystack/test_views.py000066400000000000000000000255151325051407000222110ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import time from threading import Thread from django import forms from django.http import HttpRequest, QueryDict from django.test import TestCase, override_settings from django.utils.six.moves import queue from django.urls import reverse from test_haystack.core.models import AnotherMockModel, MockModel from haystack import connections, indexes from haystack.forms import FacetedSearchForm, ModelSearchForm, SearchForm from haystack.query import EmptySearchQuerySet from haystack.utils.loading import UnifiedIndex from haystack.views import FacetedSearchView, SearchView, search_view_factory class InitialedSearchForm(SearchForm): q = forms.CharField(initial='Search for...', required=False, label='Search') class BasicMockModelSearchIndex(indexes.BasicSearchIndex, indexes.Indexable): def get_model(self): return MockModel class BasicAnotherMockModelSearchIndex(indexes.BasicSearchIndex, indexes.Indexable): def get_model(self): return AnotherMockModel class SearchViewTestCase(TestCase): fixtures = ['base_data'] def setUp(self): super(SearchViewTestCase, self).setUp() # Stow. self.old_unified_index = connections['default']._index self.ui = UnifiedIndex() self.bmmsi = BasicMockModelSearchIndex() self.bammsi = BasicAnotherMockModelSearchIndex() self.ui.build(indexes=[self.bmmsi, self.bammsi]) connections['default']._index = self.ui # Update the "index". backend = connections['default'].get_backend() backend.clear() backend.update(self.bmmsi, MockModel.objects.all()) def tearDown(self): connections['default']._index = self.old_unified_index super(SearchViewTestCase, self).tearDown() def test_search_no_query(self): response = self.client.get(reverse('haystack_search')) self.assertEqual(response.status_code, 200) def test_search_query(self): response = self.client.get(reverse('haystack_search'), {'q': 'haystack'}) self.assertEqual(response.status_code, 200) self.assertIn('page', response.context) self.assertNotIn('page_obj', response.context) self.assertEqual(len(response.context[-1]['page'].object_list), 3) self.assertEqual(response.context[-1]['page'].object_list[0].content_type(), u'core.mockmodel') self.assertEqual(response.context[-1]['page'].object_list[0].pk, '1') def test_invalid_page(self): response = self.client.get(reverse('haystack_search'), {'q': 'haystack', 'page': '165233'}) self.assertEqual(response.status_code, 404) def test_empty_results(self): sv = SearchView() sv.request = HttpRequest() sv.form = sv.build_form() self.assertTrue(isinstance(sv.get_results(), EmptySearchQuerySet)) def test_initial_data(self): sv = SearchView(form_class=InitialedSearchForm) sv.request = HttpRequest() form = sv.build_form() self.assertTrue(isinstance(form, InitialedSearchForm)) self.assertEqual(form.fields['q'].initial, 'Search for...') para = form.as_p() self.assertTrue(u'' in para) self.assertTrue(u'value="Search for..."' in para) def test_pagination(self): response = self.client.get(reverse('haystack_search'), {'q': 'haystack', 'page': 0}) self.assertEqual(response.status_code, 404) response = self.client.get(reverse('haystack_search'), {'q': 'haystack', 'page': 1}) self.assertEqual(response.status_code, 200) self.assertEqual(len(response.context[-1]['page'].object_list), 3) response = self.client.get(reverse('haystack_search'), {'q': 'haystack', 'page': 2}) self.assertEqual(response.status_code, 404) def test_thread_safety(self): exceptions = [] def threaded_view(resp_queue, view, request): time.sleep(2) try: view(request) resp_queue.put(request.GET['name']) except Exception as e: exceptions.append(e) raise class ThreadedSearchView(SearchView): def __call__(self, request): print("Name: %s" % request.GET['name']) return super(ThreadedSearchView, self).__call__(request) view = search_view_factory(view_class=ThreadedSearchView) resp_queue = queue.Queue() request_1 = HttpRequest() request_1.GET = {'name': 'foo'} request_2 = HttpRequest() request_2.GET = {'name': 'bar'} th1 = Thread(target=threaded_view, args=(resp_queue, view, request_1)) th2 = Thread(target=threaded_view, args=(resp_queue, view, request_2)) th1.start() th2.start() th1.join() th2.join() foo = resp_queue.get() bar = resp_queue.get() self.assertNotEqual(foo, bar) def test_spelling(self): # Stow. from django.conf import settings old = settings.HAYSTACK_CONNECTIONS['default'].get('INCLUDE_SPELLING', None) settings.HAYSTACK_CONNECTIONS['default']['INCLUDE_SPELLING'] = True sv = SearchView() sv.query = 'Nothing' sv.results = [] sv.build_page = lambda: (None, None) sv.create_response() context = sv.get_context() self.assertIn('suggestion', context, msg='Spelling suggestions should be present even if' ' no results were returned') self.assertEqual(context['suggestion'], None) # Restore settings.HAYSTACK_CONNECTIONS['default']['INCLUDE_SPELLING'] = old if old is None: del settings.HAYSTACK_CONNECTIONS['default']['INCLUDE_SPELLING'] @override_settings(ROOT_URLCONF='test_haystack.results_per_page_urls') class ResultsPerPageTestCase(TestCase): fixtures = ['base_data'] def setUp(self): super(ResultsPerPageTestCase, self).setUp() # Stow. self.old_unified_index = connections['default']._index self.ui = UnifiedIndex() self.bmmsi = BasicMockModelSearchIndex() self.bammsi = BasicAnotherMockModelSearchIndex() self.ui.build(indexes=[self.bmmsi, self.bammsi]) connections['default']._index = self.ui # Update the "index". backend = connections['default'].get_backend() backend.clear() backend.update(self.bmmsi, MockModel.objects.all()) def tearDown(self): connections['default']._index = self.old_unified_index super(ResultsPerPageTestCase, self).tearDown() def test_custom_results_per_page(self): response = self.client.get('/search/', {'q': 'haystack'}) self.assertEqual(response.status_code, 200) self.assertEqual(len(response.context[-1]['page'].object_list), 1) self.assertEqual(response.context[-1]['paginator'].per_page, 1) response = self.client.get('/search2/', {'q': 'hello world'}) self.assertEqual(response.status_code, 200) self.assertEqual(len(response.context[-1]['page'].object_list), 2) self.assertEqual(response.context[-1]['paginator'].per_page, 2) class FacetedSearchViewTestCase(TestCase): def setUp(self): super(FacetedSearchViewTestCase, self).setUp() # Stow. self.old_unified_index = connections['default']._index self.ui = UnifiedIndex() self.bmmsi = BasicMockModelSearchIndex() self.bammsi = BasicAnotherMockModelSearchIndex() self.ui.build(indexes=[self.bmmsi, self.bammsi]) connections['default']._index = self.ui # Update the "index". backend = connections['default'].get_backend() backend.clear() backend.update(self.bmmsi, MockModel.objects.all()) def tearDown(self): connections['default']._index = self.old_unified_index super(FacetedSearchViewTestCase, self).tearDown() def test_search_no_query(self): response = self.client.get(reverse('haystack_faceted_search')) self.assertEqual(response.status_code, 200) self.assertEqual(response.context['facets'], {}) def test_empty_results(self): fsv = FacetedSearchView() fsv.request = HttpRequest() fsv.request.GET = QueryDict('') fsv.form = fsv.build_form() self.assertTrue(isinstance(fsv.get_results(), EmptySearchQuerySet)) def test_default_form(self): fsv = FacetedSearchView() fsv.request = HttpRequest() fsv.request.GET = QueryDict('') fsv.form = fsv.build_form() self.assertTrue(isinstance(fsv.form, FacetedSearchForm)) def test_list_selected_facets(self): fsv = FacetedSearchView() fsv.request = HttpRequest() fsv.request.GET = QueryDict('') fsv.form = fsv.build_form() self.assertEqual(fsv.form.selected_facets, []) fsv = FacetedSearchView() fsv.request = HttpRequest() fsv.request.GET = QueryDict('selected_facets=author:daniel&selected_facets=author:chris') fsv.form = fsv.build_form() self.assertEqual(fsv.form.selected_facets, [u'author:daniel', u'author:chris']) class BasicSearchViewTestCase(TestCase): fixtures = ['base_data'] def setUp(self): super(BasicSearchViewTestCase, self).setUp() # Stow. self.old_unified_index = connections['default']._index self.ui = UnifiedIndex() self.bmmsi = BasicMockModelSearchIndex() self.bammsi = BasicAnotherMockModelSearchIndex() self.ui.build(indexes=[self.bmmsi, self.bammsi]) connections['default']._index = self.ui # Update the "index". backend = connections['default'].get_backend() backend.clear() backend.update(self.bmmsi, MockModel.objects.all()) def tearDown(self): connections['default']._index = self.old_unified_index super(BasicSearchViewTestCase, self).tearDown() def test_search_no_query(self): response = self.client.get(reverse('haystack_basic_search')) self.assertEqual(response.status_code, 200) def test_search_query(self): response = self.client.get(reverse('haystack_basic_search'), {'q': 'haystack'}) self.assertEqual(response.status_code, 200) self.assertEqual(type(response.context[-1]['form']), ModelSearchForm) self.assertEqual(len(response.context[-1]['page'].object_list), 3) self.assertEqual(response.context[-1]['page'].object_list[0].content_type(), u'core.mockmodel') self.assertEqual(response.context[-1]['page'].object_list[0].pk, '1') self.assertEqual(response.context[-1]['query'], u'haystack') def test_invalid_page(self): response = self.client.get(reverse('haystack_basic_search'), {'q': 'haystack', 'page': '165233'}) self.assertEqual(response.status_code, 404) django-haystack-2.8.0/test_haystack/utils.py000066400000000000000000000010471325051407000211470ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import unittest from django.conf import settings def check_solr(using='solr'): try: from pysolr import Solr, SolrError except ImportError: raise unittest.SkipTest("pysolr not installed.") solr = Solr(settings.HAYSTACK_CONNECTIONS[using]['URL']) try: solr.search('*:*') except SolrError as e: raise unittest.SkipTest("solr not running on %r" % settings.HAYSTACK_CONNECTIONS[using]['URL'], e) django-haystack-2.8.0/test_haystack/whoosh_tests/000077500000000000000000000000001325051407000221645ustar00rootroot00000000000000django-haystack-2.8.0/test_haystack/whoosh_tests/__init__.py000066400000000000000000000001141325051407000242710ustar00rootroot00000000000000# encoding: utf-8 import warnings warnings.simplefilter('ignore', Warning) django-haystack-2.8.0/test_haystack/whoosh_tests/test_forms.py000066400000000000000000000025671325051407000247350ustar00rootroot00000000000000# encoding: utf-8 """Tests for Whoosh spelling suggestions""" from __future__ import absolute_import, division, print_function, unicode_literals from django.conf import settings from django.http import HttpRequest from haystack.forms import SearchForm from haystack.query import SearchQuerySet from haystack.views import SearchView from .test_whoosh_backend import LiveWhooshRoundTripTestCase class SpellingSuggestionTestCase(LiveWhooshRoundTripTestCase): fixtures = ['base_data'] def setUp(self): self.old_spelling_setting = settings.HAYSTACK_CONNECTIONS['whoosh'].get('INCLUDE_SPELLING', False) settings.HAYSTACK_CONNECTIONS['whoosh']['INCLUDE_SPELLING'] = True super(SpellingSuggestionTestCase, self).setUp() def tearDown(self): settings.HAYSTACK_CONNECTIONS['whoosh']['INCLUDE_SPELLING'] = self.old_spelling_setting super(SpellingSuggestionTestCase, self).tearDown() def test_form_suggestion(self): form = SearchForm({'q': 'exampl'}, searchqueryset=SearchQuerySet('whoosh')) self.assertEqual(form.get_suggestion(), 'example') def test_view_suggestion(self): view = SearchView(template='test_suggestion.html', searchqueryset=SearchQuerySet('whoosh')) mock = HttpRequest() mock.GET['q'] = 'exampl' resp = view(mock) self.assertEqual(resp.content, b'Suggestion: example') django-haystack-2.8.0/test_haystack/whoosh_tests/test_inputs.py000066400000000000000000000064761325051407000251340ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from django.test import TestCase from haystack import connections, inputs class WhooshInputTestCase(TestCase): def setUp(self): super(WhooshInputTestCase, self).setUp() self.query_obj = connections['whoosh'].get_query() def test_raw_init(self): raw = inputs.Raw('hello OR there, :you') self.assertEqual(raw.query_string, 'hello OR there, :you') self.assertEqual(raw.kwargs, {}) self.assertEqual(raw.post_process, False) raw = inputs.Raw('hello OR there, :you', test='really') self.assertEqual(raw.query_string, 'hello OR there, :you') self.assertEqual(raw.kwargs, {'test': 'really'}) self.assertEqual(raw.post_process, False) def test_raw_prepare(self): raw = inputs.Raw('hello OR there, :you') self.assertEqual(raw.prepare(self.query_obj), 'hello OR there, :you') def test_clean_init(self): clean = inputs.Clean('hello OR there, :you') self.assertEqual(clean.query_string, 'hello OR there, :you') self.assertEqual(clean.post_process, True) def test_clean_prepare(self): clean = inputs.Clean('hello OR there, :you') self.assertEqual(clean.prepare(self.query_obj), "hello or there, ':you'") def test_exact_init(self): exact = inputs.Exact('hello OR there, :you') self.assertEqual(exact.query_string, 'hello OR there, :you') self.assertEqual(exact.post_process, True) def test_exact_prepare(self): exact = inputs.Exact('hello OR there, :you') self.assertEqual(exact.prepare(self.query_obj), u'"hello OR there, :you"') def test_not_init(self): not_it = inputs.Not('hello OR there, :you') self.assertEqual(not_it.query_string, 'hello OR there, :you') self.assertEqual(not_it.post_process, True) def test_not_prepare(self): not_it = inputs.Not('hello OR there, :you') self.assertEqual(not_it.prepare(self.query_obj), u"NOT (hello or there, ':you')") def test_autoquery_init(self): autoquery = inputs.AutoQuery('panic -don\'t "froody dude"') self.assertEqual(autoquery.query_string, 'panic -don\'t "froody dude"') self.assertEqual(autoquery.post_process, False) def test_autoquery_prepare(self): autoquery = inputs.AutoQuery('panic -don\'t "froody dude"') self.assertEqual(autoquery.prepare(self.query_obj), u'panic NOT don\'t "froody dude"') def test_altparser_init(self): altparser = inputs.AltParser('dismax') self.assertEqual(altparser.parser_name, 'dismax') self.assertEqual(altparser.query_string, '') self.assertEqual(altparser.kwargs, {}) self.assertEqual(altparser.post_process, False) altparser = inputs.AltParser('dismax', 'douglas adams', qf='author', mm=1) self.assertEqual(altparser.parser_name, 'dismax') self.assertEqual(altparser.query_string, 'douglas adams') self.assertEqual(altparser.kwargs, {'mm': 1, 'qf': 'author'}) self.assertEqual(altparser.post_process, False) def test_altparser_prepare(self): altparser = inputs.AltParser('hello OR there, :you') # Not supported on that backend. self.assertEqual(altparser.prepare(self.query_obj), '') django-haystack-2.8.0/test_haystack/whoosh_tests/test_whoosh_backend.py000066400000000000000000001306601325051407000265610ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import os import unittest from datetime import timedelta from decimal import Decimal from django.conf import settings from django.test import TestCase from django.test.utils import override_settings from django.utils.datetime_safe import date, datetime from whoosh.fields import BOOLEAN, DATETIME, KEYWORD, NUMERIC, TEXT from whoosh.qparser import QueryParser from haystack import connections, indexes, reset_search_queries from haystack.exceptions import SearchBackendError, SkipDocument from haystack.inputs import AutoQuery from haystack.models import SearchResult from haystack.query import SQ, SearchQuerySet from haystack.utils.loading import UnifiedIndex from ..core.models import AFourthMockModel, AnotherMockModel, MockModel from ..mocks import MockSearchResult from .testcases import WhooshTestCase class WhooshMockSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) name = indexes.CharField(model_attr='author') pub_date = indexes.DateTimeField(model_attr='pub_date') def get_model(self): return MockModel class WhooshMockSearchIndexWithSkipDocument(WhooshMockSearchIndex): def prepare_text(self, obj): if obj.author == 'daniel3': raise SkipDocument return obj.author class WhooshAnotherMockSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True) name = indexes.CharField(model_attr='author') pub_date = indexes.DateTimeField(model_attr='pub_date') def get_model(self): return AnotherMockModel def prepare_text(self, obj): return obj.author class AllTypesWhooshMockSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True) name = indexes.CharField(model_attr='author', indexed=False) pub_date = indexes.DateTimeField(model_attr='pub_date') sites = indexes.MultiValueField() seen_count = indexes.IntegerField(indexed=False) is_active = indexes.BooleanField(default=True) def get_model(self): return MockModel class WhooshMaintainTypeMockSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True) month = indexes.CharField(indexed=False) pub_date = indexes.DateTimeField(model_attr='pub_date') def get_model(self): return MockModel def prepare_text(self, obj): return "Indexed!\n%s" % obj.pk def prepare_month(self, obj): return "%02d" % obj.pub_date.month class WhooshBoostMockSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField( document=True, use_template=True, template_name='search/indexes/core/mockmodel_template.txt' ) author = indexes.CharField(model_attr='author', weight=2.0) editor = indexes.CharField(model_attr='editor') pub_date = indexes.DateTimeField(model_attr='pub_date') def get_model(self): return AFourthMockModel def prepare(self, obj): data = super(WhooshBoostMockSearchIndex, self).prepare(obj) if obj.pk % 2 == 0: data['boost'] = 2.0 return data class WhooshAutocompleteMockModelSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(model_attr='foo', document=True) name = indexes.CharField(model_attr='author') pub_date = indexes.DateTimeField(model_attr='pub_date') text_auto = indexes.EdgeNgramField(model_attr='foo') name_auto = indexes.EdgeNgramField(model_attr='author') def get_model(self): return MockModel class WhooshSearchBackendTestCase(WhooshTestCase): fixtures = ['bulk_data.json'] def setUp(self): super(WhooshSearchBackendTestCase, self).setUp() self.old_ui = connections['whoosh'].get_unified_index() self.ui = UnifiedIndex() self.wmmi = WhooshMockSearchIndex() self.wmmidni = WhooshMockSearchIndexWithSkipDocument() self.wmtmmi = WhooshMaintainTypeMockSearchIndex() self.ui.build(indexes=[self.wmmi]) self.sb = connections['whoosh'].get_backend() connections['whoosh']._index = self.ui self.sb.setup() self.raw_whoosh = self.sb.index self.parser = QueryParser(self.sb.content_field_name, schema=self.sb.schema) self.sb.delete_index() self.sample_objs = MockModel.objects.all() def tearDown(self): connections['whoosh']._index = self.old_ui super(WhooshSearchBackendTestCase, self).tearDown() def whoosh_search(self, query): self.raw_whoosh = self.raw_whoosh.refresh() searcher = self.raw_whoosh.searcher() return searcher.search(self.parser.parse(query), limit=1000) def test_non_silent(self): bad_sb = connections['whoosh'].backend('bad', PATH='/tmp/bad_whoosh', SILENTLY_FAIL=False) bad_sb.use_file_storage = False bad_sb.storage = 'omg.wtf.bbq' try: bad_sb.update(self.wmmi, self.sample_objs) self.fail() except: pass try: bad_sb.remove('core.mockmodel.1') self.fail() except: pass try: bad_sb.clear() self.fail() except: pass try: bad_sb.search('foo') self.fail() except: pass def test_update(self): self.sb.update(self.wmmi, self.sample_objs) # Check what Whoosh thinks is there. self.assertEqual(len(self.whoosh_search(u'*')), 23) self.assertEqual([doc.fields()['id'] for doc in self.whoosh_search(u'*')], [u'core.mockmodel.%s' % i for i in range(1, 24)]) def test_update_with_SkipDocument_raised(self): self.sb.update(self.wmmidni, self.sample_objs) # Check what Whoosh thinks is there. res = self.whoosh_search(u'*') self.assertEqual(len(res), 14) ids = [1, 2, 5, 6, 7, 8, 9, 11, 12, 14, 15, 18, 20, 21] self.assertListEqual( [doc.fields()['id'] for doc in res], [u'core.mockmodel.%s' % i for i in ids] ) def test_remove(self): self.sb.update(self.wmmi, self.sample_objs) self.assertEqual(self.sb.index.doc_count(), 23) self.sb.remove(self.sample_objs[0]) self.assertEqual(self.sb.index.doc_count(), 22) def test_clear(self): self.sb.update(self.wmmi, self.sample_objs) self.assertEqual(self.sb.index.doc_count(), 23) self.sb.clear() self.assertEqual(self.sb.index.doc_count(), 0) self.sb.update(self.wmmi, self.sample_objs) self.assertEqual(self.sb.index.doc_count(), 23) self.sb.clear([AnotherMockModel]) self.assertEqual(self.sb.index.doc_count(), 23) self.sb.clear([MockModel]) self.assertEqual(self.sb.index.doc_count(), 0) self.sb.index.refresh() self.sb.update(self.wmmi, self.sample_objs) self.assertEqual(self.sb.index.doc_count(), 23) self.sb.clear([AnotherMockModel, MockModel]) self.assertEqual(self.raw_whoosh.doc_count(), 0) def test_search(self): self.sb.update(self.wmmi, self.sample_objs) self.assertEqual(len(self.whoosh_search(u'*')), 23) # No query string should always yield zero results. self.assertEqual(self.sb.search(u''), {'hits': 0, 'results': []}) # A one letter query string gets nabbed by a stopwords filter. Should # always yield zero results. self.assertEqual(self.sb.search(u'a'), {'hits': 0, 'results': []}) # Possible AttributeError? # self.assertEqual(self.sb.search(u'a b'), {'hits': 0, 'results': [], 'spelling_suggestion': '', 'facets': {}}) self.assertEqual(self.sb.search(u'*')['hits'], 23) self.assertEqual([result.pk for result in self.sb.search(u'*')['results']], [u'%s' % i for i in range(1, 24)]) self.assertEqual(self.sb.search(u'Indexe')['hits'], 23) self.assertEqual(self.sb.search(u'Indexe')['spelling_suggestion'], u'indexed') self.assertEqual(self.sb.search(u'', facets=['name']), {'hits': 0, 'results': []}) results = self.sb.search(u'Index*', facets=['name']) results = self.sb.search(u'index*', facets=['name']) self.assertEqual(results['hits'], 23) self.assertEqual(results['facets'], {}) self.assertEqual(self.sb.search(u'', date_facets={'pub_date': {'start_date': date(2008, 2, 26), 'end_date': date(2008, 2, 26), 'gap': '/MONTH'}}), {'hits': 0, 'results': []}) results = self.sb.search(u'Index*', date_facets={'pub_date': {'start_date': date(2008, 2, 26), 'end_date': date(2008, 2, 26), 'gap': '/MONTH'}}) results = self.sb.search(u'index*', date_facets={'pub_date': {'start_date': date(2008, 2, 26), 'end_date': date(2008, 2, 26), 'gap': '/MONTH'}}) self.assertEqual(results['hits'], 23) self.assertEqual(results['facets'], {}) self.assertEqual(self.sb.search(u'', query_facets={'name': '[* TO e]'}), {'hits': 0, 'results': []}) results = self.sb.search(u'Index*', query_facets={'name': '[* TO e]'}) results = self.sb.search(u'index*', query_facets={'name': '[* TO e]'}) self.assertEqual(results['hits'], 23) self.assertEqual(results['facets'], {}) # self.assertEqual(self.sb.search('', narrow_queries=set(['name:daniel1'])), {'hits': 0, 'results': []}) # results = self.sb.search('Index*', narrow_queries=set(['name:daniel1'])) # self.assertEqual(results['hits'], 1) # Ensure that swapping the ``result_class`` works. self.assertTrue(isinstance(self.sb.search(u'Index*', result_class=MockSearchResult)['results'][0], MockSearchResult)) # Check the use of ``limit_to_registered_models``. self.assertEqual(self.sb.search(u'', limit_to_registered_models=False), {'hits': 0, 'results': []}) self.assertEqual(self.sb.search(u'*', limit_to_registered_models=False)['hits'], 23) self.assertEqual([result.pk for result in self.sb.search(u'*', limit_to_registered_models=False)['results']], [u'%s' % i for i in range(1, 24)]) # Stow. old_limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True) settings.HAYSTACK_LIMIT_TO_REGISTERED_MODELS = False self.assertEqual(self.sb.search(u''), {'hits': 0, 'results': []}) self.assertEqual(self.sb.search(u'*')['hits'], 23) self.assertEqual([result.pk for result in self.sb.search(u'*')['results']], [u'%s' % i for i in range(1, 24)]) # Restore. settings.HAYSTACK_LIMIT_TO_REGISTERED_MODELS = old_limit_to_registered_models def test_highlight(self): self.sb.update(self.wmmi, self.sample_objs) self.assertEqual(len(self.whoosh_search(u'*')), 23) self.assertEqual(self.sb.search(u'', highlight=True), {'hits': 0, 'results': []}) self.assertEqual(self.sb.search(u'index*', highlight=True)['hits'], 23) query = self.sb.search('Index*', highlight=True)['results'] result = [result.highlighted['text'][0] for result in query] self.assertEqual(result, ['Indexed!\n%d' % i for i in range(1, 24)]) def test_search_all_models(self): wamsi = WhooshAnotherMockSearchIndex() self.ui.build(indexes=[self.wmmi, wamsi]) self.sb.update(self.wmmi, self.sample_objs) self.sb.update(wamsi, AnotherMockModel.objects.all()) self.assertEqual(len(self.whoosh_search(u'*')), 25) self.ui.build(indexes=[self.wmmi]) def test_more_like_this(self): self.sb.update(self.wmmi, self.sample_objs) self.assertEqual(len(self.whoosh_search(u'*')), 23) # Now supported by Whoosh (as of 1.8.4). See the ``LiveWhooshMoreLikeThisTestCase``. self.assertEqual(self.sb.more_like_this(self.sample_objs[0])['hits'], 22) # Make sure that swapping the ``result_class`` doesn't blow up. try: self.sb.more_like_this(self.sample_objs[0], result_class=MockSearchResult) except: self.fail() def test_delete_index(self): self.sb.update(self.wmmi, self.sample_objs) self.assertTrue(self.sb.index.doc_count() > 0) self.sb.delete_index() self.assertEqual(self.sb.index.doc_count(), 0) def test_order_by(self): self.sb.update(self.wmmi, self.sample_objs) results = self.sb.search(u'*', sort_by=['pub_date']) self.assertEqual([result.pk for result in results['results']], [u'1', u'3', u'2', u'4', u'5', u'6', u'7', u'8', u'9', u'10', u'11', u'12', u'13', u'14', u'15', u'16', u'17', u'18', u'19', u'20', u'21', u'22', u'23']) results = self.sb.search(u'*', sort_by=['-pub_date']) self.assertEqual([result.pk for result in results['results']], [u'23', u'22', u'21', u'20', u'19', u'18', u'17', u'16', u'15', u'14', u'13', u'12', u'11', u'10', u'9', u'8', u'7', u'6', u'5', u'4', u'2', u'3', u'1']) results = self.sb.search(u'*', sort_by=['id']) self.assertEqual([result.pk for result in results['results']], [u'1', u'10', u'11', u'12', u'13', u'14', u'15', u'16', u'17', u'18', u'19', u'2', u'20', u'21', u'22', u'23', u'3', u'4', u'5', u'6', u'7', u'8', u'9']) results = self.sb.search(u'*', sort_by=['-id']) self.assertEqual([result.pk for result in results['results']], [u'9', u'8', u'7', u'6', u'5', u'4', u'3', u'23', u'22', u'21', u'20', u'2', u'19', u'18', u'17', u'16', u'15', u'14', u'13', u'12', u'11', u'10', u'1']) results = self.sb.search(u'*', sort_by=['-pub_date', '-id']) self.assertEqual([result.pk for result in results['results']], [u'23', u'22', u'21', u'20', u'19', u'18', u'17', u'16', u'15', u'14', u'13', u'12', u'11', u'10', u'9', u'8', u'7', u'6', u'5', u'4', u'2', u'3', u'1']) self.assertRaises(SearchBackendError, self.sb.search, u'*', sort_by=['-pub_date', 'id']) def test__from_python(self): self.assertEqual(self.sb._from_python('abc'), u'abc') self.assertEqual(self.sb._from_python(1), 1) self.assertEqual(self.sb._from_python(2653), 2653) self.assertEqual(self.sb._from_python(25.5), 25.5) self.assertEqual(self.sb._from_python([1, 2, 3]), u'1,2,3') self.assertTrue("a': 1" in self.sb._from_python({'a': 1, 'c': 3, 'b': 2})) self.assertEqual(self.sb._from_python(datetime(2009, 5, 9, 16, 14)), datetime(2009, 5, 9, 16, 14)) self.assertEqual(self.sb._from_python(datetime(2009, 5, 9, 0, 0)), datetime(2009, 5, 9, 0, 0)) self.assertEqual(self.sb._from_python(datetime(1899, 5, 18, 0, 0)), datetime(1899, 5, 18, 0, 0)) self.assertEqual(self.sb._from_python(datetime(2009, 5, 18, 1, 16, 30, 250)), datetime(2009, 5, 18, 1, 16, 30, 250)) def test__to_python(self): self.assertEqual(self.sb._to_python('abc'), 'abc') self.assertEqual(self.sb._to_python('1'), 1) self.assertEqual(self.sb._to_python('2653'), 2653) self.assertEqual(self.sb._to_python('25.5'), 25.5) self.assertEqual(self.sb._to_python('[1, 2, 3]'), [1, 2, 3]) self.assertEqual(self.sb._to_python('{"a": 1, "b": 2, "c": 3}'), {'a': 1, 'c': 3, 'b': 2}) self.assertEqual(self.sb._to_python('2009-05-09T16:14:00'), datetime(2009, 5, 9, 16, 14)) self.assertEqual(self.sb._to_python('2009-05-09T00:00:00'), datetime(2009, 5, 9, 0, 0)) self.assertEqual(self.sb._to_python(None), None) def test_range_queries(self): self.sb.update(self.wmmi, self.sample_objs) self.assertEqual(len(self.whoosh_search(u'[d TO]')), 23) self.assertEqual(len(self.whoosh_search(u'name:[d TO]')), 23) self.assertEqual(len(self.whoosh_search(u'Ind* AND name:[d to]')), 23) self.assertEqual(len(self.whoosh_search(u'Ind* AND name:[to c]')), 0) def test_date_queries(self): self.sb.update(self.wmmi, self.sample_objs) self.assertEqual(len(self.whoosh_search(u"pub_date:20090717003000")), 1) self.assertEqual(len(self.whoosh_search(u"pub_date:20090717000000")), 0) self.assertEqual(len(self.whoosh_search(u'Ind* AND pub_date:[to 20090717003000]')), 3) def test_escaped_characters_queries(self): self.sb.update(self.wmmi, self.sample_objs) self.assertEqual(len(self.whoosh_search(u"Indexed\!")), 23) self.assertEqual(len(self.whoosh_search(u"http\:\/\/www\.example\.com")), 0) def test_build_schema(self): ui = UnifiedIndex() ui.build(indexes=[AllTypesWhooshMockSearchIndex()]) (content_field_name, schema) = self.sb.build_schema(ui.all_searchfields()) self.assertEqual(content_field_name, 'text') schema_names = set(schema.names()) required_schema = {'django_ct', 'django_id', 'id', 'is_active', 'name', 'pub_date', 'seen_count', 'sites', 'text'} self.assertTrue(required_schema.issubset(schema_names)) self.assertIsInstance(schema._fields['text'], TEXT) self.assertIsInstance(schema._fields['pub_date'], DATETIME) self.assertIsInstance(schema._fields['seen_count'], NUMERIC) self.assertIsInstance(schema._fields['sites'], KEYWORD) self.assertIsInstance(schema._fields['is_active'], BOOLEAN) def test_verify_type(self): old_ui = connections['whoosh'].get_unified_index() ui = UnifiedIndex() wmtmmi = WhooshMaintainTypeMockSearchIndex() ui.build(indexes=[wmtmmi]) connections['whoosh']._index = ui sb = connections['whoosh'].get_backend() sb.setup() sb.update(wmtmmi, self.sample_objs) self.assertEqual(sb.search(u'*')['hits'], 23) self.assertEqual([result.month for result in sb.search(u'*')['results']], [u'06', u'07', u'06', u'07', u'07', u'07', u'07', u'07', u'07', u'07', u'07', u'07', u'07', u'07', u'07', u'07', u'07', u'07', u'07', u'07', u'07', u'07', u'07']) connections['whoosh']._index = old_ui @unittest.skipIf(settings.HAYSTACK_CONNECTIONS['whoosh'].get('STORAGE') != 'file', 'testing writability requires Whoosh to use STORAGE=file') def test_writable(self): if not os.path.exists(settings.HAYSTACK_CONNECTIONS['whoosh']['PATH']): os.makedirs(settings.HAYSTACK_CONNECTIONS['whoosh']['PATH']) os.chmod(settings.HAYSTACK_CONNECTIONS['whoosh']['PATH'], 0o400) try: self.sb.setup() self.fail() except IOError: # Yay. We failed pass os.chmod(settings.HAYSTACK_CONNECTIONS['whoosh']['PATH'], 0o755) def test_slicing(self): self.sb.update(self.wmmi, self.sample_objs) page_1 = self.sb.search(u'*', start_offset=0, end_offset=20) page_2 = self.sb.search(u'*', start_offset=20, end_offset=30) self.assertEqual(len(page_1['results']), 20) self.assertEqual([result.pk for result in page_1['results']], [u'%s' % i for i in range(1, 21)]) self.assertEqual(len(page_2['results']), 3) self.assertEqual([result.pk for result in page_2['results']], [u'21', u'22', u'23']) # This used to throw an error. page_0 = self.sb.search(u'*', start_offset=0, end_offset=0) self.assertEqual(len(page_0['results']), 1) @unittest.expectedFailure def test_scoring(self): self.sb.update(self.wmmi, self.sample_objs) page_1 = self.sb.search(u'index', start_offset=0, end_offset=20) page_2 = self.sb.search(u'index', start_offset=20, end_offset=30) self.assertEqual(len(page_1['results']), 20) self.assertEqual(["%0.2f" % result.score for result in page_1['results']], ['0.51', '0.51', '0.51', '0.51', '0.51', '0.51', '0.51', '0.51', '0.51', '0.40', '0.40', '0.40', '0.40', '0.40', '0.40', '0.40', '0.40', '0.40', '0.40', '0.40']) self.assertEqual(len(page_2['results']), 3) self.assertEqual(["%0.2f" % result.score for result in page_2['results']], ['0.40', '0.40', '0.40']) class WhooshBoostBackendTestCase(WhooshTestCase): def setUp(self): super(WhooshBoostBackendTestCase, self).setUp() self.old_ui = connections['whoosh'].get_unified_index() self.ui = UnifiedIndex() self.wmmi = WhooshBoostMockSearchIndex() self.ui.build(indexes=[self.wmmi]) self.sb = connections['whoosh'].get_backend() connections['whoosh']._index = self.ui self.sb.setup() self.raw_whoosh = self.sb.index self.parser = QueryParser(self.sb.content_field_name, schema=self.sb.schema) self.sb.delete_index() self.sample_objs = [] for i in range(1, 5): mock = AFourthMockModel() mock.id = i if i % 2: mock.author = 'daniel' mock.editor = 'david' else: mock.author = 'david' mock.editor = 'daniel' mock.pub_date = date(2009, 2, 25) - timedelta(days=i) self.sample_objs.append(mock) def tearDown(self): connections['whoosh']._index = self.ui super(WhooshBoostBackendTestCase, self).tearDown() @unittest.expectedFailure def test_boost(self): self.sb.update(self.wmmi, self.sample_objs) self.raw_whoosh = self.raw_whoosh.refresh() searcher = self.raw_whoosh.searcher() self.assertEqual(len(searcher.search(self.parser.parse(u'*'), limit=1000)), 2) results = SearchQuerySet('whoosh').filter(SQ(author='daniel') | SQ(editor='daniel')) self.assertEqual([result.id for result in results], [ 'core.afourthmockmodel.1', 'core.afourthmockmodel.3', ]) self.assertEqual(results[0].boost, 1.1) class LiveWhooshSearchQueryTestCase(WhooshTestCase): def setUp(self): super(LiveWhooshSearchQueryTestCase, self).setUp() # Stow. self.old_ui = connections['whoosh'].get_unified_index() self.ui = UnifiedIndex() self.wmmi = WhooshMockSearchIndex() self.wmtmmi = WhooshMaintainTypeMockSearchIndex() self.ui.build(indexes=[self.wmmi]) self.sb = connections['whoosh'].get_backend() connections['whoosh']._index = self.ui self.sb.setup() self.raw_whoosh = self.sb.index self.parser = QueryParser(self.sb.content_field_name, schema=self.sb.schema) self.sb.delete_index() self.sample_objs = [] for i in range(1, 4): mock = MockModel() mock.id = i mock.author = 'daniel%s' % i mock.pub_date = date(2009, 2, 25) - timedelta(days=i) self.sample_objs.append(mock) self.sq = connections['whoosh'].get_query() def tearDown(self): connections['whoosh']._index = self.old_ui super(LiveWhooshSearchQueryTestCase, self).tearDown() def test_get_spelling(self): self.sb.update(self.wmmi, self.sample_objs) self.sq.add_filter(SQ(content='Indexe')) self.assertEqual(self.sq.get_spelling_suggestion(), u'indexed') def test_log_query(self): from django.conf import settings reset_search_queries() self.assertEqual(len(connections['whoosh'].queries), 0) # Stow. with self.settings(DEBUG=False): len(self.sq.get_results()) self.assertEqual(len(connections['whoosh'].queries), 0) with self.settings(DEBUG=True): # Redefine it to clear out the cached results. self.sq = connections['whoosh'].get_query() self.sq.add_filter(SQ(name='bar')) len(self.sq.get_results()) self.assertEqual(len(connections['whoosh'].queries), 1) self.assertEqual(connections['whoosh'].queries[0]['query_string'], 'name:(bar)') # And again, for good measure. self.sq = connections['whoosh'].get_query() self.sq.add_filter(SQ(name='baz')) self.sq.add_filter(SQ(text='foo')) len(self.sq.get_results()) self.assertEqual(len(connections['whoosh'].queries), 2) self.assertEqual(connections['whoosh'].queries[0]['query_string'], 'name:(bar)') self.assertEqual(connections['whoosh'].queries[1]['query_string'], u'(name:(baz) AND text:(foo))') @override_settings(DEBUG=True) class LiveWhooshSearchQuerySetTestCase(WhooshTestCase): def setUp(self): super(LiveWhooshSearchQuerySetTestCase, self).setUp() # Stow. self.old_ui = connections['whoosh'].get_unified_index() self.ui = UnifiedIndex() self.wmmi = WhooshMockSearchIndex() self.ui.build(indexes=[self.wmmi]) self.sb = connections['whoosh'].get_backend() connections['whoosh']._index = self.ui self.sb.setup() self.raw_whoosh = self.sb.index self.parser = QueryParser(self.sb.content_field_name, schema=self.sb.schema) self.sb.delete_index() self.sample_objs = [] for i in range(1, 4): mock = MockModel() mock.id = i mock.author = 'daniel%s' % i mock.pub_date = date(2009, 2, 25) - timedelta(days=i) self.sample_objs.append(mock) self.sq = connections['whoosh'].get_query() self.sqs = SearchQuerySet('whoosh') def tearDown(self): connections['whoosh']._index = self.old_ui super(LiveWhooshSearchQuerySetTestCase, self).tearDown() def test_various_searchquerysets(self): self.sb.update(self.wmmi, self.sample_objs) sqs = self.sqs.filter(content='Index') self.assertEqual(sqs.query.build_query(), u'(Index)') self.assertEqual(len(sqs), 3) sqs = self.sqs.auto_query('Indexed!') self.assertEqual(sqs.query.build_query(), u"('Indexed!')") self.assertEqual(len(sqs), 3) sqs = self.sqs.auto_query('Indexed!').filter(pub_date__lte=date(2009, 8, 31)) self.assertEqual(sqs.query.build_query(), u"(('Indexed!') AND pub_date:([to 20090831000000]))") self.assertEqual(len(sqs), 3) sqs = self.sqs.auto_query('Indexed!').filter(pub_date__lte=date(2009, 2, 23)) self.assertEqual(sqs.query.build_query(), u"(('Indexed!') AND pub_date:([to 20090223000000]))") self.assertEqual(len(sqs), 2) sqs = self.sqs.auto_query('Indexed!').filter(pub_date__lte=date(2009, 2, 25)).filter(django_id__in=[1, 2]).exclude(name='daniel1') self.assertEqual(sqs.query.build_query(), u'((\'Indexed!\') AND pub_date:([to 20090225000000]) AND django_id:(1 OR 2) AND NOT (name:(daniel1)))') self.assertEqual(len(sqs), 1) sqs = self.sqs.auto_query('re-inker') self.assertEqual(sqs.query.build_query(), u"('re-inker')") self.assertEqual(len(sqs), 0) sqs = self.sqs.auto_query('0.7 wire') self.assertEqual(sqs.query.build_query(), u"('0.7' wire)") self.assertEqual(len(sqs), 0) sqs = self.sqs.auto_query("daler-rowney pearlescent 'bell bronze'") self.assertEqual(sqs.query.build_query(), u"('daler-rowney' pearlescent 'bell bronze')") self.assertEqual(len(sqs), 0) sqs = self.sqs.models(MockModel) self.assertEqual(sqs.query.build_query(), u'*') self.assertEqual(len(sqs), 3) def test_all_regression(self): sqs = SearchQuerySet('whoosh') self.assertEqual([result.pk for result in sqs], []) self.sb.update(self.wmmi, self.sample_objs) self.assertTrue(self.sb.index.doc_count() > 0) sqs = SearchQuerySet('whoosh') self.assertEqual(len(sqs), 3) self.assertEqual(sorted([result.pk for result in sqs]), [u'1', u'2', u'3']) try: sqs = repr(SearchQuerySet('whoosh')) except: self.fail() def test_regression_space_query(self): self.sb.update(self.wmmi, self.sample_objs) self.assertTrue(self.sb.index.doc_count() > 0) sqs = SearchQuerySet('whoosh').auto_query(" ") self.assertEqual(len(sqs), 3) sqs = SearchQuerySet('whoosh').filter(content=" ") self.assertEqual(len(sqs), 0) def test_iter(self): self.sb.update(self.wmmi, self.sample_objs) reset_search_queries() self.assertEqual(len(connections['whoosh'].queries), 0) sqs = self.sqs.auto_query('Indexed!') results = [int(result.pk) for result in iter(sqs)] self.assertEqual(sorted(results), [1, 2, 3]) self.assertEqual(len(connections['whoosh'].queries), 1) def test_slice(self): self.sb.update(self.wmmi, self.sample_objs) reset_search_queries() self.assertEqual(len(connections['whoosh'].queries), 0) results = self.sqs.auto_query('Indexed!') self.assertEqual(sorted([int(result.pk) for result in results[1:3]]), [1, 2]) self.assertEqual(len(connections['whoosh'].queries), 1) reset_search_queries() self.assertEqual(len(connections['whoosh'].queries), 0) results = self.sqs.auto_query('Indexed!') self.assertEqual(int(results[0].pk), 1) self.assertEqual(len(connections['whoosh'].queries), 1) def test_values_slicing(self): self.sb.update(self.wmmi, self.sample_objs) reset_search_queries() self.assertEqual(len(connections['whoosh'].queries), 0) # TODO: this would be a good candidate for refactoring into a TestCase subclass shared across backends # The values will come back as strings because Hasytack doesn't assume PKs are integers. # We'll prepare this set once since we're going to query the same results in multiple ways: expected_pks = ['3', '2', '1'] results = self.sqs.all().order_by('pub_date').values('pk') self.assertListEqual([i['pk'] for i in results[1:11]], expected_pks) results = self.sqs.all().order_by('pub_date').values_list('pk') self.assertListEqual([i[0] for i in results[1:11]], expected_pks) results = self.sqs.all().order_by('pub_date').values_list('pk', flat=True) self.assertListEqual(results[1:11], expected_pks) self.assertEqual(len(connections['whoosh'].queries), 3) def test_manual_iter(self): self.sb.update(self.wmmi, self.sample_objs) results = self.sqs.auto_query('Indexed!') reset_search_queries() self.assertEqual(len(connections['whoosh'].queries), 0) results = [int(result.pk) for result in results._manual_iter()] self.assertEqual(sorted(results), [1, 2, 3]) self.assertEqual(len(connections['whoosh'].queries), 1) def test_fill_cache(self): self.sb.update(self.wmmi, self.sample_objs) reset_search_queries() self.assertEqual(len(connections['whoosh'].queries), 0) results = self.sqs.auto_query('Indexed!') self.assertEqual(len(results._result_cache), 0) self.assertEqual(len(connections['whoosh'].queries), 0) results._fill_cache(0, 10) self.assertEqual(len([result for result in results._result_cache if result is not None]), 3) self.assertEqual(len(connections['whoosh'].queries), 1) results._fill_cache(10, 20) self.assertEqual(len([result for result in results._result_cache if result is not None]), 3) self.assertEqual(len(connections['whoosh'].queries), 2) def test_cache_is_full(self): self.sb.update(self.wmmi, self.sample_objs) reset_search_queries() self.assertEqual(len(connections['whoosh'].queries), 0) self.assertEqual(self.sqs._cache_is_full(), False) results = self.sqs.auto_query('Indexed!') result_list = [i for i in iter(results)] self.assertEqual(results._cache_is_full(), True) self.assertEqual(len(connections['whoosh'].queries), 1) def test_count(self): more_samples = [] for i in range(1, 50): mock = MockModel() mock.id = i mock.author = 'daniel%s' % i mock.pub_date = date(2009, 2, 25) - timedelta(days=i) more_samples.append(mock) self.sb.update(self.wmmi, more_samples) reset_search_queries() self.assertEqual(len(connections['whoosh'].queries), 0) results = self.sqs.all() self.assertEqual(len(results), 49) self.assertEqual(results._cache_is_full(), False) self.assertEqual(len(connections['whoosh'].queries), 1) def test_query_generation(self): sqs = self.sqs.filter(SQ(content=AutoQuery("hello world")) | SQ(title=AutoQuery("hello world"))) self.assertEqual(sqs.query.build_query(), u"((hello world) OR title:(hello world))") def test_result_class(self): self.sb.update(self.wmmi, self.sample_objs) # Assert that we're defaulting to ``SearchResult``. sqs = self.sqs.all() self.assertTrue(isinstance(sqs[0], SearchResult)) # Custom class. sqs = self.sqs.result_class(MockSearchResult).all() self.assertTrue(isinstance(sqs[0], MockSearchResult)) # Reset to default. sqs = self.sqs.result_class(None).all() self.assertTrue(isinstance(sqs[0], SearchResult)) class LiveWhooshMultiSearchQuerySetTestCase(WhooshTestCase): fixtures = ['bulk_data.json'] def setUp(self): super(LiveWhooshMultiSearchQuerySetTestCase, self).setUp() # Stow. self.old_ui = connections['whoosh'].get_unified_index() self.ui = UnifiedIndex() self.wmmi = WhooshMockSearchIndex() self.wamsi = WhooshAnotherMockSearchIndex() self.ui.build(indexes=[self.wmmi, self.wamsi]) self.sb = connections['whoosh'].get_backend() connections['whoosh']._index = self.ui self.sb.setup() self.raw_whoosh = self.sb.index self.parser = QueryParser(self.sb.content_field_name, schema=self.sb.schema) self.sb.delete_index() self.wmmi.update(using='whoosh') self.wamsi.update(using='whoosh') self.sqs = SearchQuerySet('whoosh') def tearDown(self): connections['whoosh']._index = self.old_ui super(LiveWhooshMultiSearchQuerySetTestCase, self).tearDown() def test_searchquerysets_with_models(self): sqs = self.sqs.all() self.assertEqual(sqs.query.build_query(), u'*') self.assertEqual(len(sqs), 25) sqs = self.sqs.models(MockModel) self.assertEqual(sqs.query.build_query(), u'*') self.assertEqual(len(sqs), 23) sqs = self.sqs.models(AnotherMockModel) self.assertEqual(sqs.query.build_query(), u'*') self.assertEqual(len(sqs), 2) class LiveWhooshMoreLikeThisTestCase(WhooshTestCase): fixtures = ['bulk_data.json'] def setUp(self): super(LiveWhooshMoreLikeThisTestCase, self).setUp() # Stow. self.old_ui = connections['whoosh'].get_unified_index() self.ui = UnifiedIndex() self.wmmi = WhooshMockSearchIndex() self.wamsi = WhooshAnotherMockSearchIndex() self.ui.build(indexes=[self.wmmi, self.wamsi]) self.sb = connections['whoosh'].get_backend() connections['whoosh']._index = self.ui self.sb.setup() self.raw_whoosh = self.sb.index self.parser = QueryParser(self.sb.content_field_name, schema=self.sb.schema) self.sb.delete_index() self.wmmi.update() self.wamsi.update() self.sqs = SearchQuerySet('whoosh') def tearDown(self): connections['whoosh']._index = self.old_ui super(LiveWhooshMoreLikeThisTestCase, self).tearDown() # We expect failure here because, despite not changing the code, Whoosh # 2.5.1 returns incorrect counts/results. Huzzah. @unittest.expectedFailure def test_more_like_this(self): mlt = self.sqs.more_like_this(MockModel.objects.get(pk=22)) self.assertEqual(mlt.count(), 22) self.assertEqual(sorted([result.pk for result in mlt]), sorted([u'9', u'8', u'7', u'6', u'5', u'4', u'3', u'2', u'1', u'21', u'20', u'19', u'18', u'17', u'16', u'15', u'14', u'13', u'12', u'11', u'10', u'23'])) self.assertEqual(len([result.pk for result in mlt]), 22) alt_mlt = self.sqs.filter(name='daniel3').more_like_this(MockModel.objects.get(pk=13)) self.assertEqual(alt_mlt.count(), 8) self.assertEqual(sorted([result.pk for result in alt_mlt]), sorted([u'4', u'3', u'22', u'19', u'17', u'16', u'10', u'23'])) self.assertEqual(len([result.pk for result in alt_mlt]), 8) alt_mlt_with_models = self.sqs.models(MockModel).more_like_this(MockModel.objects.get(pk=11)) self.assertEqual(alt_mlt_with_models.count(), 22) self.assertEqual(sorted([result.pk for result in alt_mlt_with_models]), sorted([u'9', u'8', u'7', u'6', u'5', u'4', u'3', u'2', u'1', u'22', u'21', u'20', u'19', u'18', u'17', u'16', u'15', u'14', u'13', u'12', u'10', u'23'])) self.assertEqual(len([result.pk for result in alt_mlt_with_models]), 22) if hasattr(MockModel.objects, 'defer'): # Make sure MLT works with deferred bits. mi = MockModel.objects.defer('foo').get(pk=22) deferred = self.sqs.models(MockModel).more_like_this(mi) self.assertEqual(deferred.count(), 22) self.assertEqual(sorted([result.pk for result in deferred]), sorted([u'9', u'8', u'7', u'6', u'5', u'4', u'3', u'2', u'1', u'21', u'20', u'19', u'18', u'17', u'16', u'15', u'14', u'13', u'12', u'11', u'10', u'23'])) self.assertEqual(len([result.pk for result in deferred]), 22) # Ensure that swapping the ``result_class`` works. self.assertTrue(isinstance(self.sqs.result_class(MockSearchResult).more_like_this(MockModel.objects.get(pk=21))[0], MockSearchResult)) @override_settings(DEBUG=True) class LiveWhooshAutocompleteTestCase(WhooshTestCase): fixtures = ['bulk_data.json'] def setUp(self): super(LiveWhooshAutocompleteTestCase, self).setUp() # Stow. self.old_ui = connections['whoosh'].get_unified_index() self.ui = UnifiedIndex() self.wacsi = WhooshAutocompleteMockModelSearchIndex() self.ui.build(indexes=[self.wacsi]) self.sb = connections['whoosh'].get_backend() connections['whoosh']._index = self.ui # Stow. import haystack self.sb.setup() self.sqs = SearchQuerySet('whoosh') # Wipe it clean. self.sqs.query.backend.clear() self.wacsi.update(using='whoosh') def tearDown(self): connections['whoosh']._index = self.old_ui super(LiveWhooshAutocompleteTestCase, self).tearDown() def test_autocomplete(self): autocomplete = self.sqs.autocomplete(text_auto='mod') self.assertEqual(autocomplete.count(), 5) self.assertEqual([result.pk for result in autocomplete], [u'1', u'12', u'6', u'7', u'14']) self.assertTrue('mod' in autocomplete[0].text.lower()) self.assertTrue('mod' in autocomplete[1].text.lower()) self.assertTrue('mod' in autocomplete[2].text.lower()) self.assertTrue('mod' in autocomplete[3].text.lower()) self.assertTrue('mod' in autocomplete[4].text.lower()) self.assertEqual(len([result.pk for result in autocomplete]), 5) def test_edgengram_regression(self): autocomplete = self.sqs.autocomplete(text_auto='ngm') self.assertEqual(autocomplete.count(), 0) def test_extra_whitespace(self): autocomplete = self.sqs.autocomplete(text_auto='mod ') self.assertEqual(autocomplete.count(), 5) class WhooshRoundTripSearchIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, default='') name = indexes.CharField() is_active = indexes.BooleanField() post_count = indexes.IntegerField() average_rating = indexes.FloatField() price = indexes.DecimalField() pub_date = indexes.DateField() created = indexes.DateTimeField() tags = indexes.MultiValueField() sites = indexes.MultiValueField() # For a regression involving lists with nothing in them. empty_list = indexes.MultiValueField() def get_model(self): return MockModel def prepare(self, obj): prepped = super(WhooshRoundTripSearchIndex, self).prepare(obj) prepped.update({ 'text': 'This is some example text.', 'name': 'Mister Pants', 'is_active': True, 'post_count': 25, 'average_rating': 3.6, 'price': Decimal('24.99'), 'pub_date': date(2009, 11, 21), 'created': datetime(2009, 11, 21, 21, 31, 00), 'tags': ['staff', 'outdoor', 'activist', 'scientist'], 'sites': [3, 5, 1], 'empty_list': [], }) return prepped @override_settings(DEBUG=True) class LiveWhooshRoundTripTestCase(WhooshTestCase): def setUp(self): super(LiveWhooshRoundTripTestCase, self).setUp() # Stow. self.old_ui = connections['whoosh'].get_unified_index() self.ui = UnifiedIndex() self.wrtsi = WhooshRoundTripSearchIndex() self.ui.build(indexes=[self.wrtsi]) self.sb = connections['whoosh'].get_backend() connections['whoosh']._index = self.ui self.sb.setup() self.raw_whoosh = self.sb.index self.parser = QueryParser(self.sb.content_field_name, schema=self.sb.schema) self.sb.delete_index() self.sqs = SearchQuerySet('whoosh') # Wipe it clean. self.sqs.query.backend.clear() # Fake indexing. mock = MockModel() mock.id = 1 self.sb.update(self.wrtsi, [mock]) def tearDown(self): super(LiveWhooshRoundTripTestCase, self).tearDown() def test_round_trip(self): results = self.sqs.filter(id='core.mockmodel.1') # Sanity check. self.assertEqual(results.count(), 1) # Check the individual fields. result = results[0] self.assertEqual(result.id, 'core.mockmodel.1') self.assertEqual(result.text, 'This is some example text.') self.assertEqual(result.name, 'Mister Pants') self.assertEqual(result.is_active, True) self.assertEqual(result.post_count, 25) self.assertEqual(result.average_rating, 3.6) self.assertEqual(result.price, u'24.99') self.assertEqual(result.pub_date, datetime(2009, 11, 21, 0, 0)) self.assertEqual(result.created, datetime(2009, 11, 21, 21, 31, 00)) self.assertEqual(result.tags, ['staff', 'outdoor', 'activist', 'scientist']) self.assertEqual(result.sites, [u'3', u'5', u'1']) self.assertEqual(result.empty_list, []) # Check boolean filtering... results = self.sqs.filter(id='core.mockmodel.1', is_active=True) self.assertEqual(results.count(), 1) @override_settings(DEBUG=True) class LiveWhooshRamStorageTestCase(TestCase): def setUp(self): super(LiveWhooshRamStorageTestCase, self).setUp() # Stow. self.old_whoosh_storage = settings.HAYSTACK_CONNECTIONS['whoosh'].get('STORAGE', 'file') settings.HAYSTACK_CONNECTIONS['whoosh']['STORAGE'] = 'ram' self.old_ui = connections['whoosh'].get_unified_index() self.ui = UnifiedIndex() self.wrtsi = WhooshRoundTripSearchIndex() self.ui.build(indexes=[self.wrtsi]) self.sb = connections['whoosh'].get_backend() connections['whoosh']._index = self.ui # Stow. import haystack self.sb.setup() self.raw_whoosh = self.sb.index self.parser = QueryParser(self.sb.content_field_name, schema=self.sb.schema) self.sqs = SearchQuerySet('whoosh') # Wipe it clean. self.sqs.query.backend.clear() # Fake indexing. mock = MockModel() mock.id = 1 self.sb.update(self.wrtsi, [mock]) def tearDown(self): self.sqs.query.backend.clear() settings.HAYSTACK_CONNECTIONS['whoosh']['STORAGE'] = self.old_whoosh_storage connections['whoosh']._index = self.old_ui super(LiveWhooshRamStorageTestCase, self).tearDown() def test_ram_storage(self): results = self.sqs.filter(id='core.mockmodel.1') # Sanity check. self.assertEqual(results.count(), 1) # Check the individual fields. result = results[0] self.assertEqual(result.id, 'core.mockmodel.1') self.assertEqual(result.text, 'This is some example text.') self.assertEqual(result.name, 'Mister Pants') self.assertEqual(result.is_active, True) self.assertEqual(result.post_count, 25) self.assertEqual(result.average_rating, 3.6) self.assertEqual(result.pub_date, datetime(2009, 11, 21, 0, 0)) self.assertEqual(result.created, datetime(2009, 11, 21, 21, 31, 00)) self.assertEqual(result.tags, ['staff', 'outdoor', 'activist', 'scientist']) self.assertEqual(result.sites, [u'3', u'5', u'1']) self.assertEqual(result.empty_list, []) django-haystack-2.8.0/test_haystack/whoosh_tests/test_whoosh_query.py000066400000000000000000000164111325051407000263340ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import datetime from haystack import connections from haystack.inputs import Exact from haystack.models import SearchResult from haystack.query import SearchQuerySet, SQ from ..core.models import AnotherMockModel, MockModel from .testcases import WhooshTestCase class WhooshSearchQueryTestCase(WhooshTestCase): def setUp(self): super(WhooshSearchQueryTestCase, self).setUp() self.sq = connections['whoosh'].get_query() def test_build_query_all(self): self.assertEqual(self.sq.build_query(), '*') def test_build_query_single_word(self): self.sq.add_filter(SQ(content='hello')) self.assertEqual(self.sq.build_query(), '(hello)') def test_build_query_multiple_words_and(self): self.sq.add_filter(SQ(content='hello')) self.sq.add_filter(SQ(content='world')) self.assertEqual(self.sq.build_query(), u'((hello) AND (world))') def test_build_query_multiple_words_not(self): self.sq.add_filter(~SQ(content='hello')) self.sq.add_filter(~SQ(content='world')) self.assertEqual(self.sq.build_query(), u'(NOT ((hello)) AND NOT ((world)))') def test_build_query_multiple_words_or(self): self.sq.add_filter(SQ(content='hello') | SQ(content='world')) self.assertEqual(self.sq.build_query(), u'((hello) OR (world))') def test_build_query_multiple_words_mixed(self): self.sq.add_filter(SQ(content='why') | SQ(content='hello')) self.sq.add_filter(~SQ(content='world')) self.assertEqual(self.sq.build_query(), u'(((why) OR (hello)) AND NOT ((world)))') def test_build_query_phrase(self): self.sq.add_filter(SQ(content='hello world')) self.assertEqual(self.sq.build_query(), u'(hello AND world)') self.sq.add_filter(SQ(content__exact='hello world')) self.assertEqual(self.sq.build_query(), u'((hello AND world) AND ("hello world"))') def test_build_query_boost(self): self.sq.add_filter(SQ(content='hello')) self.sq.add_boost('world', 5) self.assertEqual(self.sq.build_query(), "(hello) world^5") def test_correct_exact(self): self.sq.add_filter(SQ(content=Exact('hello world'))) self.assertEqual(self.sq.build_query(), '("hello world")') def test_build_query_multiple_filter_types(self): self.sq.add_filter(SQ(content='why')) self.sq.add_filter(SQ(pub_date__lte=datetime.datetime(2009, 2, 10, 1, 59))) self.sq.add_filter(SQ(author__gt='daniel')) self.sq.add_filter(SQ(created__lt=datetime.datetime(2009, 2, 12, 12, 13))) self.sq.add_filter(SQ(title__gte='B')) self.sq.add_filter(SQ(id__in=[1, 2, 3])) self.sq.add_filter(SQ(rating__range=[3, 5])) self.assertEqual(self.sq.build_query(), u'((why) AND pub_date:([to 20090210015900]) AND author:({daniel to}) AND created:({to 20090212121300}) AND title:([B to]) AND id:(1 OR 2 OR 3) AND rating:([3 to 5]))') def test_build_query_in_filter_multiple_words(self): self.sq.add_filter(SQ(content='why')) self.sq.add_filter(SQ(title__in=["A Famous Paper", "An Infamous Article"])) self.assertEqual(self.sq.build_query(), u'((why) AND title:("A Famous Paper" OR "An Infamous Article"))') def test_build_query_in_filter_datetime(self): self.sq.add_filter(SQ(content='why')) self.sq.add_filter(SQ(pub_date__in=[datetime.datetime(2009, 7, 6, 1, 56, 21)])) self.assertEqual(self.sq.build_query(), u'((why) AND pub_date:(20090706015621))') def test_build_query_in_with_set(self): self.sq.add_filter(SQ(content='why')) self.sq.add_filter(SQ(title__in=set(["A Famous Paper", "An Infamous Article"]))) query = self.sq.build_query() self.assertTrue(u'(why)' in query) # Because ordering in Py3 is now random. if 'title:("A ' in query: self.assertTrue(u'title:("A Famous Paper" OR "An Infamous Article")' in query) else: self.assertTrue(u'title:("An Infamous Article" OR "A Famous Paper")' in query) def test_build_query_wildcard_filter_types(self): self.sq.add_filter(SQ(content='why')) self.sq.add_filter(SQ(title__startswith='haystack')) self.assertEqual(self.sq.build_query(), u'((why) AND title:(haystack*))') def test_build_query_fuzzy_filter_types(self): self.sq.add_filter(SQ(content='why')) self.sq.add_filter(SQ(title__fuzzy='haystack')) self.assertEqual(self.sq.build_query(), u'((why) AND title:(haystack~))') def test_build_query_with_contains(self): self.sq.add_filter(SQ(content='circular')) self.sq.add_filter(SQ(title__contains='haystack')) self.assertEqual(self.sq.build_query(), u'((circular) AND title:(*haystack*))') def test_build_query_with_endswith(self): self.sq.add_filter(SQ(content='circular')) self.sq.add_filter(SQ(title__endswith='haystack')) self.assertEqual(self.sq.build_query(), u'((circular) AND title:(*haystack))') def test_clean(self): self.assertEqual(self.sq.clean('hello world'), 'hello world') self.assertEqual(self.sq.clean('hello AND world'), 'hello and world') self.assertEqual(self.sq.clean('hello AND OR NOT TO + - && || ! ( ) { } [ ] ^ " ~ * ? : \ world'), 'hello and or not to \'+\' \'-\' \'&&\' \'||\' \'!\' \'(\' \')\' \'{\' \'}\' \'[\' \']\' \'^\' \'"\' \'~\' \'*\' \'?\' \':\' \'\\\' world') self.assertEqual(self.sq.clean('so please NOTe i am in a bAND and bORed'), 'so please NOTe i am in a bAND and bORed') def test_build_query_with_models(self): self.sq.add_filter(SQ(content='hello')) self.sq.add_model(MockModel) self.assertEqual(self.sq.build_query(), '(hello)') self.sq.add_model(AnotherMockModel) self.assertEqual(self.sq.build_query(), u'(hello)') def test_build_query_with_datetime(self): self.sq.add_filter(SQ(pub_date=datetime.datetime(2009, 5, 9, 16, 20))) self.assertEqual(self.sq.build_query(), u'pub_date:(20090509162000)') def test_build_query_with_sequence_and_filter_not_in(self): self.sq.add_filter(SQ(id=[1, 2, 3])) self.assertEqual(self.sq.build_query(), u'id:(1,2,3)') def test_set_result_class(self): # Assert that we're defaulting to ``SearchResult``. self.assertTrue(issubclass(self.sq.result_class, SearchResult)) # Custom class. class IttyBittyResult(object): pass self.sq.set_result_class(IttyBittyResult) self.assertTrue(issubclass(self.sq.result_class, IttyBittyResult)) # Reset to default. self.sq.set_result_class(None) self.assertTrue(issubclass(self.sq.result_class, SearchResult)) def test_in_filter_values_list(self): self.sq.add_filter(SQ(content='why')) self.sq.add_filter(SQ(title__in=MockModel.objects.values_list('id', flat=True))) self.assertEqual(self.sq.build_query(), u'((why) AND title:(1 OR 2 OR 3))') def test_narrow_sq(self): sqs = SearchQuerySet(using='whoosh').narrow(SQ(foo='moof')) self.assertTrue(isinstance(sqs, SearchQuerySet)) self.assertEqual(len(sqs.query.narrow_queries), 1) self.assertEqual(sqs.query.narrow_queries.pop(), 'foo:(moof)') django-haystack-2.8.0/test_haystack/whoosh_tests/testcases.py000066400000000000000000000025061325051407000245370ustar00rootroot00000000000000# encoding: utf-8 from __future__ import absolute_import, division, print_function, unicode_literals import os import shutil from django.conf import settings from django.test import TestCase class WhooshTestCase(TestCase): fixtures = ['base_data'] @classmethod def setUpClass(cls): for name, conn_settings in settings.HAYSTACK_CONNECTIONS.items(): if conn_settings['ENGINE'] != 'haystack.backends.whoosh_backend.WhooshEngine': continue if 'STORAGE' in conn_settings and conn_settings['STORAGE'] != 'file': continue # Start clean if os.path.exists(conn_settings['PATH']): shutil.rmtree(conn_settings['PATH']) from haystack import connections connections[name].get_backend().setup() super(WhooshTestCase, cls).setUpClass() @classmethod def tearDownClass(cls): for conn in settings.HAYSTACK_CONNECTIONS.values(): if conn['ENGINE'] != 'haystack.backends.whoosh_backend.WhooshEngine': continue if 'STORAGE' in conn and conn['STORAGE'] != 'file': continue # Start clean if os.path.exists(conn['PATH']): shutil.rmtree(conn['PATH']) super(WhooshTestCase, cls).tearDownClass() django-haystack-2.8.0/tox.ini000066400000000000000000000063661325051407000161130ustar00rootroot00000000000000[tox] envlist = docs, py27-django1.11-es1.x, py34-django1.11-es1.x, py34-django2.0-es1.x, py35-django1.11-es1.x, py35-django2.0-es1.x, pypy-django1.11-es1.x, pypy-django2.0-es1.x, py27-django1.11-es2.x, py34-django1.11-es2.x, py34-django2.0-es2.x, py35-django1.11-es2.x, py35-django2.0-es2.x, py36-django1.11-es2.x, py36-django2.0-es2.x, pypy-django1.11-es2.x, pypy-django2.0-es2.x, [base] deps = requests [django2.0] deps = Django>=2.0,<2.1 [django1.11] deps = Django>=1.11,<2.0 [es2.x] deps = elasticsearch>=2.0.0,<3.0.0 [es1.x] deps = elasticsearch>=1.0.0,<2.0.0 [testenv] commands = python test_haystack/solr_tests/server/wait-for-solr python {toxinidir}/setup.py test [testenv:pypy-django1.11-es1.x] setenv = VERSION_ES=>=1.0.0,<2.0.0 deps = {[es1.x]deps} {[django1.11]deps} {[base]deps} [testenv:pypy-django2.0-es1.x] setenv = VERSION_ES=>=1.0.0,<2.0.0 deps = {[es1.x]deps} {[django2.0]deps} {[base]deps} [testenv:py27-django1.11-es1.x] basepython = python2.7 setenv = VERSION_ES=>=1.0.0,<2.0.0 deps = {[es1.x]deps} {[django1.11]deps} {[base]deps} [testenv:py34-django1.11-es1.x] basepython = python3.4 setenv = VERSION_ES=>=1.0.0,<2.0.0 deps = {[django1.11]deps} {[base]deps} [testenv:py34-django2.0-es1.x] basepython = python3.4 setenv = VERSION_ES=>=1.0.0,<2.0.0 deps = {[django2.0]deps} {[base]deps} [testenv:py35-django1.11-es1.x] basepython = python3.5 setenv = VERSION_ES=>=1.0.0,<2.0.0 deps = {[es1.x]deps} {[django1.11]deps} {[base]deps} [testenv:py35-django2.0-es1.x] basepython = python3.5 setenv = VERSION_ES=>=1.0.0,<2.0.0 deps = {[es1.x]deps} {[django2.0]deps} {[base]deps} [testenv:pypy-django1.11-es2.x] setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = {[es2.x]deps} {[django1.11]deps} {[base]deps} [testenv:pypy-django2.0-es2.x] setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = {[es2.x]deps} {[django2.0]deps} {[base]deps} [testenv:py27-django1.11-es2.x] basepython = python2.7 setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = {[es2.x]deps} {[django1.11]deps} {[base]deps} [testenv:py34-django1.11-es2.x] basepython = python3.4 setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = {[es2.x]deps} {[django1.11]deps} {[base]deps} [testenv:py34-django2.0-es2.x] basepython = python3.4 setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = {[es2.x]deps} {[django2.0]deps} {[base]deps} [testenv:py35-django1.11-es2.x] basepython = python3.5 setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = {[es2.x]deps} {[django1.11]deps} {[base]deps} [testenv:py35-django2.0-es2.x] basepython = python3.5 setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = {[es2.x]deps} {[django2.0]deps} {[base]deps} [testenv:py36-django1.11-es2.x] basepython = python3.6 setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = {[es2.x]deps} {[django1.11]deps} {[base]deps} [testenv:py36-django2.0-es2.x] basepython = python3.6 setenv = VERSION_ES=>=2.0.0,<3.0.0 deps = {[es2.x]deps} {[django2.0]deps} {[base]deps} [testenv:docs] changedir = docs deps = sphinx commands = sphinx-build -W -b html -d {envtmpdir}/doctrees . {envtmpdir}/html