summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichael Fladischer <fladi@debian.org>2015-07-07 20:12:20 (GMT)
committerMichael Fladischer <fladi@debian.org>2015-07-07 20:12:20 (GMT)
commit48a63b751424fce6737dd9a9f8b7e7a409e5720d (patch)
treea8549454d8e119a51e658185328b43d6c87f82c1
downloaddjango-haystack-master.zip
django-haystack-master.tar.gz
django-haystack-master.tar.bz2
django-haystack (2.4.0-2) unstable; urgency=lowHEADmaster
* Switch buildsystem to pybuild. * Add Python3 support through a separate package. * Add lintian override for missing upstream changelog. # imported from the archive
-rw-r--r--AUTHORS106
-rw-r--r--LICENSE31
-rw-r--r--MANIFEST.in5
-rw-r--r--PKG-INFO79
-rw-r--r--README.rst59
-rw-r--r--debian/changelog44
-rw-r--r--debian/clean6
-rw-r--r--debian/compat1
-rw-r--r--debian/control71
-rw-r--r--debian/copyright55
-rw-r--r--debian/python-django-haystack-doc.doc-base8
-rw-r--r--debian/python-django-haystack-doc.docs1
-rw-r--r--debian/python-django-haystack-doc.lintian-overrides2
-rw-r--r--debian/python-django-haystack.docs1
-rw-r--r--debian/python-django-haystack.lintian-overrides2
-rw-r--r--debian/python3-django-haystack.docs1
-rw-r--r--debian/python3-django-haystack.lintian-overrides2
-rwxr-xr-xdebian/rules15
-rw-r--r--debian/source/format1
-rw-r--r--debian/source/lintian-overrides2
-rw-r--r--debian/watch3
-rw-r--r--django_haystack.egg-info/PKG-INFO79
-rw-r--r--django_haystack.egg-info/SOURCES.txt99
-rw-r--r--django_haystack.egg-info/dependency_links.txt1
-rw-r--r--django_haystack.egg-info/not-zip-safe1
-rw-r--r--django_haystack.egg-info/pbr.json1
-rw-r--r--django_haystack.egg-info/requires.txt1
-rw-r--r--django_haystack.egg-info/top_level.txt1
-rw-r--r--docs/Makefile80
-rw-r--r--docs/_build/.gitignore0
-rw-r--r--docs/_static/.gitignore0
-rw-r--r--docs/_templates/.gitignore0
-rw-r--r--docs/admin.rst47
-rw-r--r--docs/architecture_overview.rst66
-rw-r--r--docs/autocomplete.rst220
-rw-r--r--docs/backend_support.rst127
-rw-r--r--docs/best_practices.rst263
-rw-r--r--docs/boost.rst123
-rw-r--r--docs/conf.py207
-rw-r--r--docs/contributing.rst132
-rw-r--r--docs/creating_new_backends.rst34
-rw-r--r--docs/debugging.rst107
-rw-r--r--docs/faceting.rst328
-rw-r--r--docs/faq.rst117
-rw-r--r--docs/glossary.rst76
-rw-r--r--docs/haystack_theme/layout.html22
-rw-r--r--docs/haystack_theme/static/documentation.css29
-rw-r--r--docs/haystack_theme/theme.conf2
-rw-r--r--docs/highlighting.rst77
-rw-r--r--docs/index.rst117
-rw-r--r--docs/inputtypes.rst177
-rw-r--r--docs/installing_search_engines.rst222
-rw-r--r--docs/management_commands.rst201
-rw-r--r--docs/migration_from_1_to_2.rst285
-rw-r--r--docs/multiple_index.rst201
-rw-r--r--docs/other_apps.rst98
-rw-r--r--docs/python3.rst47
-rw-r--r--docs/rich_content_extraction.rst68
-rw-r--r--docs/running_tests.rst70
-rw-r--r--docs/searchbackend_api.rst124
-rw-r--r--docs/searchfield_api.rst262
-rw-r--r--docs/searchindex_api.rst618
-rw-r--r--docs/searchquery_api.rst336
-rw-r--r--docs/searchqueryset_api.rst893
-rw-r--r--docs/searchresult_api.rst62
-rw-r--r--docs/settings.rst289
-rw-r--r--docs/signal_processors.rst117
-rw-r--r--docs/spatial.rst412
-rw-r--r--docs/templatetags.rst68
-rw-r--r--docs/toc.rst53
-rw-r--r--docs/tutorial.rst398
-rw-r--r--docs/utils.rst18
-rw-r--r--docs/views_and_forms.rst408
-rw-r--r--docs/who_uses.rst357
-rw-r--r--haystack/__init__.py71
-rw-r--r--haystack/admin.py163
-rw-r--r--haystack/backends/__init__.py1041
-rw-r--r--haystack/backends/elasticsearch_backend.py944
-rw-r--r--haystack/backends/simple_backend.py135
-rw-r--r--haystack/backends/solr_backend.py718
-rw-r--r--haystack/backends/whoosh_backend.py916
-rw-r--r--haystack/constants.py33
-rw-r--r--haystack/exceptions.py53
-rw-r--r--haystack/fields.py441
-rw-r--r--haystack/forms.py133
-rw-r--r--haystack/generic_views.py126
-rw-r--r--haystack/indexes.py497
-rw-r--r--haystack/inputs.py159
-rw-r--r--haystack/management/__init__.py0
-rw-r--r--haystack/management/commands/__init__.py0
-rw-r--r--haystack/management/commands/build_solr_schema.py70
-rw-r--r--haystack/management/commands/clear_index.py59
-rw-r--r--haystack/management/commands/haystack_info.py21
-rw-r--r--haystack/management/commands/rebuild_index.py26
-rwxr-xr-xhaystack/management/commands/update_index.py289
-rw-r--r--haystack/manager.py107
-rw-r--r--haystack/models.py247
-rw-r--r--haystack/panels.py86
-rw-r--r--haystack/query.py841
-rw-r--r--haystack/routers.py18
-rw-r--r--haystack/signals.py90
-rw-r--r--haystack/templates/panels/haystack.html33
-rw-r--r--haystack/templates/search_configuration/solr.xml166
-rw-r--r--haystack/templatetags/__init__.py0
-rw-r--r--haystack/templatetags/highlight.py119
-rw-r--r--haystack/templatetags/more_like_this.py108
-rw-r--r--haystack/urls.py16
-rw-r--r--haystack/utils/__init__.py88
-rwxr-xr-xhaystack/utils/app_loading.py90
-rw-r--r--haystack/utils/geo.py78
-rw-r--r--haystack/utils/highlighting.py165
-rw-r--r--haystack/utils/loading.py334
-rw-r--r--haystack/utils/log.py25
-rw-r--r--haystack/views.py235
-rw-r--r--setup.cfg22
-rwxr-xr-xsetup.py68
116 files changed, 17467 insertions, 0 deletions
diff --git a/AUTHORS b/AUTHORS
new file mode 100644
index 0000000..a40e7a7
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1,106 @@
+Primary Authors:
+
+ * Daniel Lindsley
+ * Matt Croydon (some documentation, sanity checks and the sweet name)
+ * Travis Cline (the original SQ implementation, improvements to ModelSearchIndex)
+ * David Sauve (notanumber) for the Xapian backend, the simple backend and various patches.
+ * Jannis Leidel (jezdez)
+ * Chris Adams (acdha)
+ * Justin Caratzas (bigjust)
+ * Andrew Schoen (andrewschoen)
+ * Dan Watson (dcwatson)
+ * Matt Woodward (mpwoodward)
+ * Alex Vidal (avidal)
+ * Zach Smith (zmsmith)
+ * Stefan Wehrmeyer (stefanw)
+ * George Hickman (ghickman)
+ * Ben Spaulding (benspaulding)
+
+
+Thanks to
+ * Jacob Kaplan-Moss & Joseph Kocherhans for the original implementation of
+ djangosearch, of which portions were used, as well as basic API feedback.
+ * Christian Metts for designing the logo and building a better site.
+ * Nathan Borror for testing and advanced form usage.
+ * Malcolm Tredinnick for API feedback.
+ * Mediaphormedia for funding the development on More Like This and faceting.
+ * Travis Cline for API feedback, Git help and improvements to the reindex command.
+ * Brian Rosner for various patches.
+ * Richard Boulton for feedback and suggestions.
+ * Cyberdelia for feedback and patches.
+ * Ask Solem for for patching the setup.py.
+ * Ben Spaulding for feedback and documentation patches.
+ * smulloni for various patches.
+ * JoeGermuska for various patches.
+ * SmileyChris for various patches.
+ * sk1p for various patches.
+ * Ryszard Szopa (ryszard) for various patches.
+ * Patryk Zawadzki (patrys) for various patches and feedback.
+ * Frank Wiles for documentation patches.
+ * Chris Adams (acdha) for various patches.
+ * Kyle MacFarlane for various patches.
+ * Alex Gaynor (alex) for help with handling deferred models with More Like This.
+ * RobertGawron for a patch to the Highlighter.
+ * Simon Willison (simonw) for various proposals and patches.
+ * Ben Firshman (bfirsh) for faceting improvements and suggestions.
+ * Peter Bengtsson for a patch regarding passing a customized site.
+ * Sam Bull (osirius) for a patch regarding initial data on SearchForms.
+ * slai for a patch regarding Whoosh and fetching all documents of a certain model type.
+ * alanwj for a patch regarding Whoosh and empty MultiValueFields.
+ * alanzoppa for a patch regarding highlighting.
+ * piquadrat for a patch regarding the more_like_this template tag.
+ * dedsm for a patch regarding the pickling of SearchResult objects.
+ * EmilStenstrom for a patch to the Highlighter.
+ * symroe for a patch regarding the more_like_this template tag.
+ * ghostrocket for a patch regarding the simple backend.
+ * Rob Hudson (robhudson) for improvements to the admin search.
+ * apollo13 for simplifying ``SearchForm.__init__``.
+ * Carl Meyer (carljm) for a patch regarding character primary keys.
+ * oyiptong for a patch regarding pickling.
+ * alfredo for a patch to generate epub docs.
+ * Luke Hatcher (lukeman) for documentation patches.
+ * Trey Hunner (treyhunner) for a Whoosh field boosting patch.
+ * Kent Gormat of Retail Catalyst for funding the development of multiple index support.
+ * Gidsy for funding the initial geospatial implementation
+ * CMGdigital for funding the development on:
+ * a multiprocessing-enabled version of ``update_index``.
+ * the addition of ``--start/--end`` options in ``update_index``.
+ * the ability to specify both apps & models to ``update_index``.
+ * A significant portion of the geospatial feature.
+ * A significant portion of the input types feature.
+ * Aram Dulyan (Aramgutang) for fixing the included admin class to be Django 1.4 compatible.
+ * Honza Kral (HonzaKral) for various Elasticsearch tweaks & testing.
+ * Alex Vidal (avidal) for a patch allowing developers to override the queryset used for update operations.
+ * Igor Támara (ikks) for a patch related to Unicode ``verbose_name_plural``.
+ * Dan Helfman (witten) for a patch related to highlighting.
+ * Matt DeBoard for refactor of ``SolrSearchBackend.search`` method to allow simpler extension of the class.
+ * Rodrigo Guzman (rz) for a fix to query handling in the ``simple`` backend.
+ * Martin J. Laubach (mjl) for fixing the logic used when combining querysets
+ * Eric Holscher (ericholscher) for a docs fix.
+ * Erik Rose (erikrose) for a quick pyelasticsearch-compatibility patch
+ * Stefan Wehrmeyer (stefanw) for a simple search filter fix
+ * Dan Watson (dcwatson) for various patches.
+ * Andrew Schoen (andrewschoen) for the addition of ``HAYSTACK_IDENTIFIER_METHOD``
+ * Pablo SEMINARIO (pabluk) for a docs fix, and a fix in the ElasticSearch backend.
+ * Eric Thurgood (ethurgood) for a import fix in the Elasticssearch backend.
+ * Revolution Systems & The Python Software Foundation for funding a significant portion of the port to Python 3!
+ * Artem Kostiuk (postatum) for patch allowing to search for slash character in ElasticSearch since Lucene 4.0.
+ * Luis Barrueco (luisbarrueco) for a simple fix regarding updating indexes using multiple backends.
+ * Szymon Teżewski (jasisz) for an update to the bounding-box calculation for spatial queries
+ * Chris Wilson (qris) and Orlando Fiol (overflow) for an update allowing the use of multiple order_by()
+ fields with Whoosh as long as they share a consistent sort direction
+ * Steven Skoczen (@skoczen) for an ElasticSearch bug fix
+ * @Xaroth for updating the app loader to be compatible with Django 1.7
+ * Jaroslav Gorjatsev (jarig) for a bugfix with index_fieldname
+ * Dirk Eschler (@deschler) for app loader Django 1.7 compatibility fixes
+ * Wictor (wicol) for a patch improving the error message given when model_attr references a non-existent
+ field
+ * Pierre Dulac (dulaccc) for a patch updating distance filters for ElasticSearch 1.x
+ * Andrei Fokau (andreif) for adding support for ``SQ`` in ``SearchQuerySet.narrow()``
+ * Phill Tornroth (phill-tornroth) for several patches improving UnifiedIndex and ElasticSearch support
+ * Philippe Luickx (philippeluickx) for documenting how to provide backend-specific facet options
+ * Felipe Prenholato (@chronossc) for a patch making it easy to exclude documents from indexing using custom logic
+ * Alfredo Armanini (@phingage) for a patch fixing compatibility with database API changes in Django 1.8
+ * Ben Spaulding (@benspaulding) for many updates for Django 1.8 support
+ * Troy Grosfield (@troygrosfield) for fixing the test runner for Django 1.8
+ * Ilan Steemers (@Koed00) for fixing Django 1.9 deprecation warnings
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..0bb702e
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,31 @@
+Copyright (c) 2009-2013, Daniel Lindsley.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. Neither the name of Haystack nor the names of its contributors may be used
+ to endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+---
+
+Prior to April 17, 2009, this software was released under the MIT license.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..ac95f3f
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,5 @@
+recursive-include docs *
+recursive-include haystack/templates *.xml *.html
+include AUTHORS
+include LICENSE
+include README.rst
diff --git a/PKG-INFO b/PKG-INFO
new file mode 100644
index 0000000..6c444ac
--- /dev/null
+++ b/PKG-INFO
@@ -0,0 +1,79 @@
+Metadata-Version: 1.1
+Name: django-haystack
+Version: 2.4.0
+Summary: Pluggable search for Django.
+Home-page: http://haystacksearch.org/
+Author: Daniel Lindsley
+Author-email: daniel@toastdriven.com
+License: UNKNOWN
+Description: ========
+ Haystack
+ ========
+
+ :author: Daniel Lindsley
+ :date: 2013/07/28
+
+ Haystack provides modular search for Django. It features a unified, familiar
+ API that allows you to plug in different search backends (such as Solr_,
+ Elasticsearch_, Whoosh_, Xapian_, etc.) without having to modify your code.
+
+ .. _Solr: http://lucene.apache.org/solr/
+ .. _Elasticsearch: http://elasticsearch.org/
+ .. _Whoosh: https://bitbucket.org/mchaput/whoosh/
+ .. _Xapian: http://xapian.org/
+
+ Haystack is BSD licensed, plays nicely with third-party app without needing to
+ modify the source and supports advanced features like faceting, More Like This,
+ highlighting, spatial search and spelling suggestions.
+
+ You can find more information at http://haystacksearch.org/.
+
+
+ Getting Help
+ ============
+
+ There is a mailing list (http://groups.google.com/group/django-haystack/)
+ available for general discussion and an IRC channel (#haystack on
+ irc.freenode.net).
+
+
+ Documentation
+ =============
+
+ * Development version: http://docs.haystacksearch.org/
+ * v2.3.X: http://django-haystack.readthedocs.org/en/v2.3.0/
+ * v2.2.X: http://django-haystack.readthedocs.org/en/v2.2.0/
+ * v2.1.X: http://django-haystack.readthedocs.org/en/v2.1.0/
+ * v2.0.X: http://django-haystack.readthedocs.org/en/v2.0.0/
+ * v1.2.X: http://django-haystack.readthedocs.org/en/v1.2.7/
+ * v1.1.X: http://django-haystack.readthedocs.org/en/v1.1/
+
+ Build Status
+ ============
+
+ .. image:: https://travis-ci.org/django-haystack/django-haystack.svg?branch=master
+ :target: https://travis-ci.org/django-haystack/django-haystack
+
+ Requirements
+ ============
+
+ Haystack has a relatively easily-met set of requirements.
+
+ * Python 2.7+ or Python 3.3+
+ * Django 1.6+
+
+ Additionally, each backend has its own requirements. You should refer to
+ http://django-haystack.readthedocs.org/en/latest/installing_search_engines.html for more
+ details.
+
+Platform: UNKNOWN
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Environment :: Web Environment
+Classifier: Framework :: Django
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: BSD License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 2
+Classifier: Programming Language :: Python :: 3
+Classifier: Topic :: Utilities
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..2c0e549
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,59 @@
+========
+Haystack
+========
+
+:author: Daniel Lindsley
+:date: 2013/07/28
+
+Haystack provides modular search for Django. It features a unified, familiar
+API that allows you to plug in different search backends (such as Solr_,
+Elasticsearch_, Whoosh_, Xapian_, etc.) without having to modify your code.
+
+.. _Solr: http://lucene.apache.org/solr/
+.. _Elasticsearch: http://elasticsearch.org/
+.. _Whoosh: https://bitbucket.org/mchaput/whoosh/
+.. _Xapian: http://xapian.org/
+
+Haystack is BSD licensed, plays nicely with third-party app without needing to
+modify the source and supports advanced features like faceting, More Like This,
+highlighting, spatial search and spelling suggestions.
+
+You can find more information at http://haystacksearch.org/.
+
+
+Getting Help
+============
+
+There is a mailing list (http://groups.google.com/group/django-haystack/)
+available for general discussion and an IRC channel (#haystack on
+irc.freenode.net).
+
+
+Documentation
+=============
+
+* Development version: http://docs.haystacksearch.org/
+* v2.3.X: http://django-haystack.readthedocs.org/en/v2.3.0/
+* v2.2.X: http://django-haystack.readthedocs.org/en/v2.2.0/
+* v2.1.X: http://django-haystack.readthedocs.org/en/v2.1.0/
+* v2.0.X: http://django-haystack.readthedocs.org/en/v2.0.0/
+* v1.2.X: http://django-haystack.readthedocs.org/en/v1.2.7/
+* v1.1.X: http://django-haystack.readthedocs.org/en/v1.1/
+
+Build Status
+============
+
+.. image:: https://travis-ci.org/django-haystack/django-haystack.svg?branch=master
+ :target: https://travis-ci.org/django-haystack/django-haystack
+
+Requirements
+============
+
+Haystack has a relatively easily-met set of requirements.
+
+* Python 2.7+ or Python 3.3+
+* Django 1.6+
+
+Additionally, each backend has its own requirements. You should refer to
+http://django-haystack.readthedocs.org/en/latest/installing_search_engines.html for more
+details.
diff --git a/debian/changelog b/debian/changelog
new file mode 100644
index 0000000..5f4a725
--- /dev/null
+++ b/debian/changelog
@@ -0,0 +1,44 @@
+django-haystack (2.4.0-2) unstable; urgency=low
+
+ * Switch buildsystem to pybuild.
+ * Add Python3 support through a separate package.
+ * Add lintian override for missing upstream changelog.
+
+ -- Michael Fladischer <fladi@debian.org> Tue, 07 Jul 2015 22:12:20 +0200
+
+django-haystack (2.4.0-1) unstable; urgency=low
+
+ * New upstream release.
+ * Remove files from d/copyright which are no longer shipped by
+ upstream.
+ * Use pypi.debian.net service for uscan.
+ * Change my email address to fladi@debian.org.
+
+ -- Michael Fladischer <fladi@debian.org> Tue, 07 Jul 2015 16:18:03 +0200
+
+django-haystack (2.3.1-1) unstable; urgency=medium
+
+ * New upstream release (Closes: #755599).
+ * Bump Standards-Version to 3.9.6.
+ * Disable tests as they require a live SOLR and elasticsearch server.
+ * Change file names for solr configuration files in d/copyright.
+ * Make pysolr require at least version 3.2.0.
+ * Add python-elasticsearch to Suggests.
+ * Drop packages required by tests from Build-Depends:
+ + python-django
+ + python-httplib2
+ + python-mock
+ + python-pysolr
+ + python-whoosh
+ * Drop python-xapian from suggests as the xapian backend is not
+ included.
+ * Add django_haystack.egg-info/requires.txt to d/clean.
+ * Remove empty lines at EOF for d/clean and d/rules.
+
+ -- Michael Fladischer <FladischerMichael@fladi.at> Mon, 20 Oct 2014 14:18:24 +0200
+
+django-haystack (2.1.0-1) unstable; urgency=low
+
+ * Initial release (Closes: #563311).
+
+ -- Michael Fladischer <FladischerMichael@fladi.at> Thu, 13 Mar 2014 19:11:15 +0100
diff --git a/debian/clean b/debian/clean
new file mode 100644
index 0000000..2573b6b
--- /dev/null
+++ b/debian/clean
@@ -0,0 +1,6 @@
+django_haystack.egg-info/PKG-INFO
+django_haystack.egg-info/SOURCES.txt
+django_haystack.egg-info/dependency_links.txt
+django_haystack.egg-info/not-zip-safe
+django_haystack.egg-info/top_level.txt
+django_haystack.egg-info/requires.txt
diff --git a/debian/compat b/debian/compat
new file mode 100644
index 0000000..ec63514
--- /dev/null
+++ b/debian/compat
@@ -0,0 +1 @@
+9
diff --git a/debian/control b/debian/control
new file mode 100644
index 0000000..0aa8c1d
--- /dev/null
+++ b/debian/control
@@ -0,0 +1,71 @@
+Source: django-haystack
+Section: python
+Priority: optional
+Maintainer: Debian Python Modules Team <python-modules-team@lists.alioth.debian.org>
+Uploaders: Michael Fladischer <fladi@debian.org>
+Build-Depends: debhelper (>= 9),
+ dh-python,
+ python-all,
+ python-setuptools,
+ python-sphinx (>= 1.0.7+dfsg),
+ python3-all,
+ python3-setuptools
+Standards-Version: 3.9.6
+X-Python-Version: >= 2.6
+X-Python3-Version: >= 3.3
+Homepage: https://github.com/toastdriven/django-haystack
+Vcs-Svn: svn://anonscm.debian.org/python-modules/packages/django-haystack/trunk/
+Vcs-Browser: http://anonscm.debian.org/viewvc/python-modules/packages/django-haystack/trunk/
+
+Package: python-django-haystack
+Architecture: all
+Depends: python-django (>= 1.5),
+ ${misc:Depends},
+ ${python:Depends}
+Suggests: python-elasticsearch,
+ python-httplib2,
+ python-pysolr (>= 3.2.0),
+ python-whoosh
+Description: modular search for Django
+ Haystack provides modular search for Django. It features a unified, familiar
+ API that allows you to plug in different search backends (such as Solr,
+ Elasticsearch, Whoosh, Xapian, etc.) without having to modify your code.
+ .
+ It plays nicely with third-party app without needing to modify the source and
+ supports advanced features like faceting, More Like This, highlighting, spatial
+ search and spelling suggestions.
+
+Package: python3-django-haystack
+Architecture: all
+Depends: python3-django,
+ ${misc:Depends},
+ ${python3:Depends}
+Suggests: python3-elasticsearch,
+ python3-httplib2,
+ python3-whoosh
+Description: modular search for Django (Python3 version)
+ Haystack provides modular search for Django. It features a unified, familiar
+ API that allows you to plug in different search backends (such as Solr,
+ Elasticsearch, Whoosh, Xapian, etc.) without having to modify your code.
+ .
+ It plays nicely with third-party app without needing to modify the source and
+ supports advanced features like faceting, More Like This, highlighting, spatial
+ search and spelling suggestions.
+ .
+ This package contains the Python 3 version of the library.
+
+Package: python-django-haystack-doc
+Section: doc
+Architecture: all
+Depends: ${misc:Depends},
+ ${sphinxdoc:Depends}
+Description: modular search for Django (Documentation)
+ Haystack provides modular search for Django. It features a unified, familiar
+ API that allows you to plug in different search backends (such as Solr,
+ Elasticsearch, Whoosh, Xapian, etc.) without having to modify your code.
+ .
+ It plays nicely with third-party app without needing to modify the source and
+ supports advanced features like faceting, More Like This, highlighting, spatial
+ search and spelling suggestions.
+ .
+ This package contains the documentation.
diff --git a/debian/copyright b/debian/copyright
new file mode 100644
index 0000000..8e706d2
--- /dev/null
+++ b/debian/copyright
@@ -0,0 +1,55 @@
+Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
+Upstream-Name: django-haystack
+Upstream-Contact: Daniel Lindsley <daniel@toastdriven.com>
+Source: https://github.com/toastdriven/django-haystack
+
+Files: *
+Copyright: 2009-2013, Daniel Lindsley <daniel@toastdriven.com>
+License: BSD-3-clause
+
+Files: haystack/templates/search_configuration/solr.xml
+Copyright: Apache Software Foundation
+License: Apache
+
+Files: debian/*
+Copyright: 2013, Fladischer Michael <fladi@debian.org>
+License: BSD-3-clause
+
+License: BSD-3-clause
+ Redistribution and use in source and binary forms, with or without modification,
+ are permitted provided that the following conditions are met:
+ .
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+ .
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ .
+ 3. Neither the name of Haystack nor the names of its contributors may be used
+ to endorse or promote products derived from this software without
+ specific prior written permission.
+ .
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+License: Apache
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ .
+ http://www.apache.org/licenses/LICENSE-2.0
+ .
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
diff --git a/debian/python-django-haystack-doc.doc-base b/debian/python-django-haystack-doc.doc-base
new file mode 100644
index 0000000..af562f5
--- /dev/null
+++ b/debian/python-django-haystack-doc.doc-base
@@ -0,0 +1,8 @@
+Document: python-django-haystack-doc
+Title: Dango Haystack Documentation
+Author: Daniel Lindsley <daniel@toastdriven.com>
+Section: Programming/Python
+
+Format: HTML
+Index: /usr/share/doc/python-django-haystack-doc/html/index.html
+Files: /usr/share/doc/python-django-haystack-doc/html/*.html
diff --git a/debian/python-django-haystack-doc.docs b/debian/python-django-haystack-doc.docs
new file mode 100644
index 0000000..4ecc793
--- /dev/null
+++ b/debian/python-django-haystack-doc.docs
@@ -0,0 +1 @@
+docs/_build/html
diff --git a/debian/python-django-haystack-doc.lintian-overrides b/debian/python-django-haystack-doc.lintian-overrides
new file mode 100644
index 0000000..c5cea0f
--- /dev/null
+++ b/debian/python-django-haystack-doc.lintian-overrides
@@ -0,0 +1,2 @@
+# Upstream does not provide a changelog.
+python-django-haystack-doc: no-upstream-changelog
diff --git a/debian/python-django-haystack.docs b/debian/python-django-haystack.docs
new file mode 100644
index 0000000..a1320b1
--- /dev/null
+++ b/debian/python-django-haystack.docs
@@ -0,0 +1 @@
+README.rst
diff --git a/debian/python-django-haystack.lintian-overrides b/debian/python-django-haystack.lintian-overrides
new file mode 100644
index 0000000..717776b
--- /dev/null
+++ b/debian/python-django-haystack.lintian-overrides
@@ -0,0 +1,2 @@
+# Upstream does not provide a changelog.
+python-django-haystack: no-upstream-changelog
diff --git a/debian/python3-django-haystack.docs b/debian/python3-django-haystack.docs
new file mode 100644
index 0000000..a1320b1
--- /dev/null
+++ b/debian/python3-django-haystack.docs
@@ -0,0 +1 @@
+README.rst
diff --git a/debian/python3-django-haystack.lintian-overrides b/debian/python3-django-haystack.lintian-overrides
new file mode 100644
index 0000000..bf957e8
--- /dev/null
+++ b/debian/python3-django-haystack.lintian-overrides
@@ -0,0 +1,2 @@
+# Upstream does not provide a changelog.
+python3-django-haystack: no-upstream-changelog
diff --git a/debian/rules b/debian/rules
new file mode 100755
index 0000000..b1bc88b
--- /dev/null
+++ b/debian/rules
@@ -0,0 +1,15 @@
+#!/usr/bin/make -f
+
+export PYBUILD_NAME=django-haystack
+export PYBUILD_DISABLE=test
+
+%:
+ dh $@ --with python2,python3,sphinxdoc --buildsystem=pybuild
+
+override_dh_auto_build:
+ PYTHONPATH=. sphinx-build -b html -d docs/_build/.doctrees -N docs docs/_build/html
+ dh_auto_build
+
+override_dh_clean:
+ rm -rf docs/_build
+ dh_clean
diff --git a/debian/source/format b/debian/source/format
new file mode 100644
index 0000000..163aaf8
--- /dev/null
+++ b/debian/source/format
@@ -0,0 +1 @@
+3.0 (quilt)
diff --git a/debian/source/lintian-overrides b/debian/source/lintian-overrides
new file mode 100644
index 0000000..ded6f93
--- /dev/null
+++ b/debian/source/lintian-overrides
@@ -0,0 +1,2 @@
+# Upstream does not provide PGP signatures for their release tarballs.
+django-haystack source: debian-watch-may-check-gpg-signature
diff --git a/debian/watch b/debian/watch
new file mode 100644
index 0000000..44dc910
--- /dev/null
+++ b/debian/watch
@@ -0,0 +1,3 @@
+version=3
+opts=uversionmangle=s/(rc|a|b|c)/~$1/ \
+http://pypi.debian.net/django-haystack/django-haystack-(.+)\.(?:zip|tgz|tbz|txz|(?:tar\.(?:gz|bz2|xz))) \ No newline at end of file
diff --git a/django_haystack.egg-info/PKG-INFO b/django_haystack.egg-info/PKG-INFO
new file mode 100644
index 0000000..6c444ac
--- /dev/null
+++ b/django_haystack.egg-info/PKG-INFO
@@ -0,0 +1,79 @@
+Metadata-Version: 1.1
+Name: django-haystack
+Version: 2.4.0
+Summary: Pluggable search for Django.
+Home-page: http://haystacksearch.org/
+Author: Daniel Lindsley
+Author-email: daniel@toastdriven.com
+License: UNKNOWN
+Description: ========
+ Haystack
+ ========
+
+ :author: Daniel Lindsley
+ :date: 2013/07/28
+
+ Haystack provides modular search for Django. It features a unified, familiar
+ API that allows you to plug in different search backends (such as Solr_,
+ Elasticsearch_, Whoosh_, Xapian_, etc.) without having to modify your code.
+
+ .. _Solr: http://lucene.apache.org/solr/
+ .. _Elasticsearch: http://elasticsearch.org/
+ .. _Whoosh: https://bitbucket.org/mchaput/whoosh/
+ .. _Xapian: http://xapian.org/
+
+ Haystack is BSD licensed, plays nicely with third-party app without needing to
+ modify the source and supports advanced features like faceting, More Like This,
+ highlighting, spatial search and spelling suggestions.
+
+ You can find more information at http://haystacksearch.org/.
+
+
+ Getting Help
+ ============
+
+ There is a mailing list (http://groups.google.com/group/django-haystack/)
+ available for general discussion and an IRC channel (#haystack on
+ irc.freenode.net).
+
+
+ Documentation
+ =============
+
+ * Development version: http://docs.haystacksearch.org/
+ * v2.3.X: http://django-haystack.readthedocs.org/en/v2.3.0/
+ * v2.2.X: http://django-haystack.readthedocs.org/en/v2.2.0/
+ * v2.1.X: http://django-haystack.readthedocs.org/en/v2.1.0/
+ * v2.0.X: http://django-haystack.readthedocs.org/en/v2.0.0/
+ * v1.2.X: http://django-haystack.readthedocs.org/en/v1.2.7/
+ * v1.1.X: http://django-haystack.readthedocs.org/en/v1.1/
+
+ Build Status
+ ============
+
+ .. image:: https://travis-ci.org/django-haystack/django-haystack.svg?branch=master
+ :target: https://travis-ci.org/django-haystack/django-haystack
+
+ Requirements
+ ============
+
+ Haystack has a relatively easily-met set of requirements.
+
+ * Python 2.7+ or Python 3.3+
+ * Django 1.6+
+
+ Additionally, each backend has its own requirements. You should refer to
+ http://django-haystack.readthedocs.org/en/latest/installing_search_engines.html for more
+ details.
+
+Platform: UNKNOWN
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Environment :: Web Environment
+Classifier: Framework :: Django
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: BSD License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 2
+Classifier: Programming Language :: Python :: 3
+Classifier: Topic :: Utilities
diff --git a/django_haystack.egg-info/SOURCES.txt b/django_haystack.egg-info/SOURCES.txt
new file mode 100644
index 0000000..53c2d69
--- /dev/null
+++ b/django_haystack.egg-info/SOURCES.txt
@@ -0,0 +1,99 @@
+AUTHORS
+LICENSE
+MANIFEST.in
+README.rst
+setup.cfg
+setup.py
+django_haystack.egg-info/PKG-INFO
+django_haystack.egg-info/SOURCES.txt
+django_haystack.egg-info/dependency_links.txt
+django_haystack.egg-info/not-zip-safe
+django_haystack.egg-info/pbr.json
+django_haystack.egg-info/requires.txt
+django_haystack.egg-info/top_level.txt
+docs/Makefile
+docs/admin.rst
+docs/architecture_overview.rst
+docs/autocomplete.rst
+docs/backend_support.rst
+docs/best_practices.rst
+docs/boost.rst
+docs/conf.py
+docs/contributing.rst
+docs/creating_new_backends.rst
+docs/debugging.rst
+docs/faceting.rst
+docs/faq.rst
+docs/glossary.rst
+docs/highlighting.rst
+docs/index.rst
+docs/inputtypes.rst
+docs/installing_search_engines.rst
+docs/management_commands.rst
+docs/migration_from_1_to_2.rst
+docs/multiple_index.rst
+docs/other_apps.rst
+docs/python3.rst
+docs/rich_content_extraction.rst
+docs/running_tests.rst
+docs/searchbackend_api.rst
+docs/searchfield_api.rst
+docs/searchindex_api.rst
+docs/searchquery_api.rst
+docs/searchqueryset_api.rst
+docs/searchresult_api.rst
+docs/settings.rst
+docs/signal_processors.rst
+docs/spatial.rst
+docs/templatetags.rst
+docs/toc.rst
+docs/tutorial.rst
+docs/utils.rst
+docs/views_and_forms.rst
+docs/who_uses.rst
+docs/_build/.gitignore
+docs/_static/.gitignore
+docs/_templates/.gitignore
+docs/haystack_theme/layout.html
+docs/haystack_theme/theme.conf
+docs/haystack_theme/static/documentation.css
+haystack/__init__.py
+haystack/admin.py
+haystack/constants.py
+haystack/exceptions.py
+haystack/fields.py
+haystack/forms.py
+haystack/generic_views.py
+haystack/indexes.py
+haystack/inputs.py
+haystack/manager.py
+haystack/models.py
+haystack/panels.py
+haystack/query.py
+haystack/routers.py
+haystack/signals.py
+haystack/urls.py
+haystack/views.py
+haystack/backends/__init__.py
+haystack/backends/elasticsearch_backend.py
+haystack/backends/simple_backend.py
+haystack/backends/solr_backend.py
+haystack/backends/whoosh_backend.py
+haystack/management/__init__.py
+haystack/management/commands/__init__.py
+haystack/management/commands/build_solr_schema.py
+haystack/management/commands/clear_index.py
+haystack/management/commands/haystack_info.py
+haystack/management/commands/rebuild_index.py
+haystack/management/commands/update_index.py
+haystack/templates/panels/haystack.html
+haystack/templates/search_configuration/solr.xml
+haystack/templatetags/__init__.py
+haystack/templatetags/highlight.py
+haystack/templatetags/more_like_this.py
+haystack/utils/__init__.py
+haystack/utils/app_loading.py
+haystack/utils/geo.py
+haystack/utils/highlighting.py
+haystack/utils/loading.py
+haystack/utils/log.py \ No newline at end of file
diff --git a/django_haystack.egg-info/dependency_links.txt b/django_haystack.egg-info/dependency_links.txt
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/django_haystack.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/django_haystack.egg-info/not-zip-safe b/django_haystack.egg-info/not-zip-safe
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/django_haystack.egg-info/not-zip-safe
@@ -0,0 +1 @@
+
diff --git a/django_haystack.egg-info/pbr.json b/django_haystack.egg-info/pbr.json
new file mode 100644
index 0000000..694ff89
--- /dev/null
+++ b/django_haystack.egg-info/pbr.json
@@ -0,0 +1 @@
+{"is_release": false, "git_version": "ebf1a5c"} \ No newline at end of file
diff --git a/django_haystack.egg-info/requires.txt b/django_haystack.egg-info/requires.txt
new file mode 100644
index 0000000..eec1cf1
--- /dev/null
+++ b/django_haystack.egg-info/requires.txt
@@ -0,0 +1 @@
+Django \ No newline at end of file
diff --git a/django_haystack.egg-info/top_level.txt b/django_haystack.egg-info/top_level.txt
new file mode 100644
index 0000000..d755762
--- /dev/null
+++ b/django_haystack.egg-info/top_level.txt
@@ -0,0 +1 @@
+haystack
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..791d8d6
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,80 @@
+# Makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS =
+SPHINXBUILD = sphinx-build
+PAPER =
+
+# Internal variables.
+PAPEROPT_a4 = -D latex_paper_size=a4
+PAPEROPT_letter = -D latex_paper_size=letter
+ALLSPHINXOPTS = -d _build/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+
+.PHONY: help clean html web pickle htmlhelp latex changes linkcheck
+
+help:
+ @echo "Please use \`make <target>' where <target> is one of"
+ @echo " html to make standalone HTML files"
+ @echo " pickle to make pickle files"
+ @echo " json to make JSON files"
+ @echo " htmlhelp to make HTML files and a HTML help project"
+ @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
+ @echo " changes to make an overview over all changed/added/deprecated items"
+ @echo " linkcheck to check all external links for integrity"
+
+clean:
+ -rm -rf _build/*
+
+html:
+ mkdir -p _build/html _build/doctrees
+ $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) _build/html
+ @echo
+ @echo "Build finished. The HTML pages are in _build/html."
+
+pickle:
+ mkdir -p _build/pickle _build/doctrees
+ $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) _build/pickle
+ @echo
+ @echo "Build finished; now you can process the pickle files."
+
+web: pickle
+
+json:
+ mkdir -p _build/json _build/doctrees
+ $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) _build/json
+ @echo
+ @echo "Build finished; now you can process the JSON files."
+
+htmlhelp:
+ mkdir -p _build/htmlhelp _build/doctrees
+ $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) _build/htmlhelp
+ @echo
+ @echo "Build finished; now you can run HTML Help Workshop with the" \
+ ".hhp project file in _build/htmlhelp."
+
+latex:
+ mkdir -p _build/latex _build/doctrees
+ $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) _build/latex
+ @echo
+ @echo "Build finished; the LaTeX files are in _build/latex."
+ @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \
+ "run these through (pdf)latex."
+
+changes:
+ mkdir -p _build/changes _build/doctrees
+ $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) _build/changes
+ @echo
+ @echo "The overview file is in _build/changes."
+
+linkcheck:
+ mkdir -p _build/linkcheck _build/doctrees
+ $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) _build/linkcheck
+ @echo
+ @echo "Link check complete; look for any errors in the above output " \
+ "or in _build/linkcheck/output.txt."
+
+epub:
+ $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) _build/epub
+ @echo
+ @echo "Build finished. The epub file is in _build/epub."
diff --git a/docs/_build/.gitignore b/docs/_build/.gitignore
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/docs/_build/.gitignore
diff --git a/docs/_static/.gitignore b/docs/_static/.gitignore
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/docs/_static/.gitignore
diff --git a/docs/_templates/.gitignore b/docs/_templates/.gitignore
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/docs/_templates/.gitignore
diff --git a/docs/admin.rst b/docs/admin.rst
new file mode 100644
index 0000000..c7a2d2a
--- /dev/null
+++ b/docs/admin.rst
@@ -0,0 +1,47 @@
+.. _ref-admin:
+
+===================
+Django Admin Search
+===================
+
+Haystack comes with a base class to support searching via Haystack in the
+Django admin. To use Haystack to search, inherit from ``haystack.admin.SearchModelAdmin``
+instead of ``django.contrib.admin.ModelAdmin``.
+
+For example::
+
+ from haystack.admin import SearchModelAdmin
+ from .models import MockModel
+
+
+ class MockModelAdmin(SearchModelAdmin):
+ haystack_connection = 'solr'
+ date_hierarchy = 'pub_date'
+ list_display = ('author', 'pub_date')
+
+
+ admin.site.register(MockModel, MockModelAdmin)
+
+You can also specify the Haystack connection used by the search with the
+``haystack_connection`` property on the model admin class. If not specified,
+the default connection will be used.
+
+If you already have a base model admin class you use, there is also a mixin
+you can use instead::
+
+ from django.contrib import admin
+ from haystack.admin import SearchModelAdminMixin
+ from .models import MockModel
+
+
+ class MyCustomModelAdmin(admin.ModelAdmin):
+ pass
+
+
+ class MockModelAdmin(SearchModelAdminMixin, MyCustomModelAdmin):
+ haystack_connection = 'solr'
+ date_hierarchy = 'pub_date'
+ list_display = ('author', 'pub_date')
+
+
+ admin.site.register(MockModel, MockModelAdmin)
diff --git a/docs/architecture_overview.rst b/docs/architecture_overview.rst
new file mode 100644
index 0000000..f56a58b
--- /dev/null
+++ b/docs/architecture_overview.rst
@@ -0,0 +1,66 @@
+.. _ref-architecture-overview:
+
+=====================
+Architecture Overview
+=====================
+
+``SearchQuerySet``
+------------------
+
+One main implementation.
+
+* Standard API that loosely follows ``QuerySet``
+* Handles most queries
+* Allows for custom "parsing"/building through API
+* Dispatches to ``SearchQuery`` for actual query
+* Handles automatically creating a query
+* Allows for raw queries to be passed straight to backend.
+
+
+``SearchQuery``
+---------------
+
+Implemented per-backend.
+
+* Method for building the query out of the structured data.
+* Method for cleaning a string of reserved characters used by the backend.
+
+Main class provides:
+
+* Methods to add filters/models/order-by/boost/limits to the search.
+* Method to perform a raw search.
+* Method to get the number of hits.
+* Method to return the results provided by the backend (likely not a full list).
+
+
+``SearchBackend``
+-----------------
+
+Implemented per-backend.
+
+* Connects to search engine
+* Method for saving new docs to index
+* Method for removing docs from index
+* Method for performing the actual query
+
+
+``SearchSite``
+--------------
+
+One main implementation.
+
+* Standard API that loosely follows ``django.contrib.admin.sites.AdminSite``
+* Handles registering/unregistering models to search on a per-site basis.
+* Provides a means of adding custom indexes to a model, like ``ModelAdmins``.
+
+
+``SearchIndex``
+---------------
+
+Implemented per-model you wish to index.
+
+* Handles generating the document to be indexed.
+* Populates additional fields to accompany the document.
+* Provides a way to limit what types of objects get indexed.
+* Provides a way to index the document(s).
+* Provides a way to remove the document(s).
diff --git a/docs/autocomplete.rst b/docs/autocomplete.rst
new file mode 100644
index 0000000..0ff7005
--- /dev/null
+++ b/docs/autocomplete.rst
@@ -0,0 +1,220 @@
+.. _ref-autocomplete:
+
+============
+Autocomplete
+============
+
+Autocomplete is becoming increasingly common as an add-on to search. Haystack
+makes it relatively simple to implement. There are two steps in the process,
+one to prepare the data and one to implement the actual search.
+
+Step 1. Setup The Data
+======================
+
+To do autocomplete effectively, the search backend uses n-grams (essentially
+a small window passed over the string). Because this alters the way your
+data needs to be stored, the best approach is to add a new field to your
+``SearchIndex`` that contains the text you want to autocomplete on.
+
+You have two choices: ``NgramField`` and ``EdgeNgramField``. Though very similar,
+the choice of field is somewhat important.
+
+* If you're working with standard text, ``EdgeNgramField`` tokenizes on
+ whitespace. This prevents incorrect matches when part of two different words
+ are mashed together as one n-gram. **This is what most users should use.**
+* If you're working with Asian languages or want to be able to autocomplete
+ across word boundaries, ``NgramField`` should be what you use.
+
+Example (continuing from the tutorial)::
+
+ import datetime
+ from haystack import indexes
+ from myapp.models import Note
+
+
+ class NoteIndex(indexes.SearchIndex, indexes.Indexable):
+ text = indexes.CharField(document=True, use_template=True)
+ author = indexes.CharField(model_attr='user')
+ pub_date = indexes.DateTimeField(model_attr='pub_date')
+ # We add this for autocomplete.
+ content_auto = indexes.EdgeNgramField(model_attr='content')
+
+ def get_model(self):
+ return Note
+
+ def index_queryset(self, using=None):
+ """Used when the entire index for model is updated."""
+ return Note.objects.filter(pub_date__lte=datetime.datetime.now())
+
+As with all schema changes, you'll need to rebuild/update your index after
+making this change.
+
+
+Step 2. Performing The Query
+============================
+
+Haystack ships with a convenience method to perform most autocomplete searches.
+You simply provide a field and the query you wish to search on to the
+``SearchQuerySet.autocomplete`` method. Given the previous example, an example
+search would look like::
+
+ from haystack.query import SearchQuerySet
+
+ SearchQuerySet().autocomplete(content_auto='old')
+ # Result match things like 'goldfish', 'cuckold' and 'older'.
+
+The results from the ``SearchQuerySet.autocomplete`` method are full search
+results, just like any regular filter.
+
+If you need more control over your results, you can use standard
+``SearchQuerySet.filter`` calls. For instance::
+
+ from haystack.query import SearchQuerySet
+
+ sqs = SearchQuerySet().filter(content_auto=request.GET.get('q', ''))
+
+This can also be extended to use ``SQ`` for more complex queries (and is what's
+being done under the hood in the ``SearchQuerySet.autocomplete`` method).
+
+
+Example Implementation
+======================
+
+The above is the low-level backend portion of how you implement autocomplete.
+To make it work in browser, you need both a view to run the autocomplete
+and some Javascript to fetch the results.
+
+Since it comes up often, here is an example implementation of those things.
+
+.. warning::
+
+ This code comes with no warranty. Don't ask for support on it. If you
+ copy-paste it and it burns down your server room, I'm not liable for any
+ of it.
+
+ It worked this one time on my machine in a simulated environment.
+
+ And yeah, semicolon-less + 2 space + comma-first. Deal with it.
+
+A stripped-down view might look like::
+
+ # views.py
+ import simplejson as json
+ from django.http import HttpResponse
+ from haystack.query import SearchQuerySet
+
+
+ def autocomplete(request):
+ sqs = SearchQuerySet().autocomplete(content_auto=request.GET.get('q', ''))[:5]
+ suggestions = [result.title for result in sqs]
+ # Make sure you return a JSON object, not a bare list.
+ # Otherwise, you could be vulnerable to an XSS attack.
+ the_data = json.dumps({
+ 'results': suggestions
+ })
+ return HttpResponse(the_data, content_type='application/json')
+
+The template might look like::
+
+ <!DOCTYPE html>
+ <html>
+ <head>
+ <meta charset="utf-8">
+ <title>Autocomplete Example</title>
+ </head>
+ <body>
+ <h1>Autocomplete Example</h1>
+
+ <form method="post" action="/search/" class="autocomplete-me">
+ <input type="text" id="id_q" name="q">
+ <input type="submit" value="Search!">
+ </form>
+
+ <script src="http://ajax.googleapis.com/ajax/libs/jquery/1.8.3/jquery.min.js"></script>
+ <script type="text/javascript">
+ // In a perfect world, this would be its own library file that got included
+ // on the page and only the ``$(document).ready(...)`` below would be present.
+ // But this is an example.
+ var Autocomplete = function(options) {
+ this.form_selector = options.form_selector
+ this.url = options.url || '/search/autocomplete/'
+ this.delay = parseInt(options.delay || 300)
+ this.minimum_length = parseInt(options.minimum_length || 3)
+ this.form_elem = null
+ this.query_box = null
+ }
+
+ Autocomplete.prototype.setup = function() {
+ var self = this
+
+ this.form_elem = $(this.form_selector)
+ this.query_box = this.form_elem.find('input[name=q]')
+
+ // Watch the input box.
+ this.query_box.on('keyup', function() {
+ var query = self.query_box.val()
+
+ if(query.length < self.minimum_length) {
+ return false
+ }
+
+ self.fetch(query)
+ })
+
+ // On selecting a result, populate the search field.
+ this.form_elem.on('click', '.ac-result', function(ev) {
+ self.query_box.val($(this).text())
+ $('.ac-results').remove()
+ return false
+ })
+ }
+
+ Autocomplete.prototype.fetch = function(query) {
+ var self = this
+
+ $.ajax({
+ url: this.url
+ , data: {
+ 'q': query
+ }
+ , success: function(data) {
+ self.show_results(data)
+ }
+ })
+ }
+
+ Autocomplete.prototype.show_results = function(data) {
+ // Remove any existing results.
+ $('.ac-results').remove()
+
+ var results = data.results || []
+ var results_wrapper = $('<div class="ac-results"></div>')
+ var base_elem = $('<div class="result-wrapper"><a href="#" class="ac-result"></a></div>')
+
+ if(results.length > 0) {
+ for(var res_offset in results) {
+ var elem = base_elem.clone()
+ // Don't use .html(...) here, as you open yourself to XSS.
+ // Really, you should use some form of templating.
+ elem.find('.ac-result').text(results[res_offset])
+ results_wrapper.append(elem)
+ }
+ }
+ else {
+ var elem = base_elem.clone()
+ elem.text("No results found.")
+ results_wrapper.append(elem)
+ }
+
+ this.query_box.after(results_wrapper)
+ }
+
+ $(document).ready(function() {
+ window.autocomplete = new Autocomplete({
+ form_selector: '.autocomplete-me'
+ })
+ window.autocomplete.setup()
+ })
+ </script>
+ </body>
+ </html>
diff --git a/docs/backend_support.rst b/docs/backend_support.rst
new file mode 100644
index 0000000..4ab3bc6
--- /dev/null
+++ b/docs/backend_support.rst
@@ -0,0 +1,127 @@
+.. _ref-backend-support:
+
+===============
+Backend Support
+===============
+
+
+Supported Backends
+==================
+
+* Solr_
+* Elasticsearch_
+* Whoosh_
+* Xapian_
+
+.. _Solr: http://lucene.apache.org/solr/
+.. _Elasticsearch: http://elasticsearch.org/
+.. _Whoosh: https://bitbucket.org/mchaput/whoosh/
+.. _Xapian: http://xapian.org/
+
+
+Backend Capabilities
+====================
+
+Solr
+----
+
+**Complete & included with Haystack.**
+
+* Full SearchQuerySet support
+* Automatic query building
+* "More Like This" functionality
+* Term Boosting
+* Faceting
+* Stored (non-indexed) fields
+* Highlighting
+* Spatial search
+* Requires: pysolr (2.0.13+) & Solr 3.5+
+
+Elasticsearch
+-------------
+
+**Complete & included with Haystack.**
+
+* Full SearchQuerySet support
+* Automatic query building
+* "More Like This" functionality
+* Term Boosting
+* Faceting (up to 100 facets)
+* Stored (non-indexed) fields
+* Highlighting
+* Spatial search
+* Requires: elasticsearch-py > 1.0 & Elasticsearch 1.0+
+
+Whoosh
+------
+
+**Complete & included with Haystack.**
+
+* Full SearchQuerySet support
+* Automatic query building
+* "More Like This" functionality
+* Term Boosting
+* Stored (non-indexed) fields
+* Highlighting
+* Requires: whoosh (2.0.0+)
+
+Xapian
+------
+
+**Complete & available as a third-party download.**
+
+* Full SearchQuerySet support
+* Automatic query building
+* "More Like This" functionality
+* Term Boosting
+* Faceting
+* Stored (non-indexed) fields
+* Highlighting
+* Requires: Xapian 1.0.5+ & python-xapian 1.0.5+
+* Backend can be downloaded here: `xapian-haystack <http://github.com/notanumber/xapian-haystack/>`_
+
+Backend Support Matrix
+======================
+
++----------------+------------------------+---------------------+----------------+------------+----------+---------------+--------------+---------+
+| Backend | SearchQuerySet Support | Auto Query Building | More Like This | Term Boost | Faceting | Stored Fields | Highlighting | Spatial |
++================+========================+=====================+================+============+==========+===============+==============+=========+
+| Solr | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes |
++----------------+------------------------+---------------------+----------------+------------+----------+---------------+--------------+---------+
+| Elasticsearch | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes |
++----------------+------------------------+---------------------+----------------+------------+----------+---------------+--------------+---------+
+| Whoosh | Yes | Yes | Yes | Yes | No | Yes | Yes | No |
++----------------+------------------------+---------------------+----------------+------------+----------+---------------+--------------+---------+
+| Xapian | Yes | Yes | Yes | Yes | Yes | Yes | Yes (plugin) | No |
++----------------+------------------------+---------------------+----------------+------------+----------+---------------+--------------+---------+
+
+
+Wishlist
+========
+
+The following are search backends that would be nice to have in Haystack but are
+licensed in a way that prevents them from being officially bundled. If the
+community expresses interest in any of these, there may be future development.
+
+* Riak_
+* Lupyne_
+* Sphinx_
+
+.. _Riak: http://www.basho.com/
+.. _Lupyne: http://code.google.com/p/lupyne/
+.. _Sphinx: http://www.sphinxsearch.com/
+
+
+Sphinx
+------
+
+This backend is unlikely to be built. Sphinx is pretty gimpy & doesn't do
+blended search results across all models the way the other engines can.
+Very limited featureset as well.
+
+* Full SearchQuerySet support
+* Automatic query building
+* Term Boosting
+* Stored (non-indexed) fields
+* Highlighting
+* Requires: sphinxapi.py (Comes with Sphinx)
diff --git a/docs/best_practices.rst b/docs/best_practices.rst
new file mode 100644
index 0000000..bf70eaf
--- /dev/null
+++ b/docs/best_practices.rst
@@ -0,0 +1,263 @@
+.. _ref-best-practices:
+
+==============
+Best Practices
+==============
+
+What follows are some general recommendations on how to improve your search.
+Some tips represent performance benefits, some provide a better search index.
+You should evaluate these options for yourself and pick the ones that will
+work best for you. Not all situations are created equal and many of these
+options could be considered mandatory in some cases and unnecessary premature
+optimizations in others. Your mileage may vary.
+
+
+Good Search Needs Good Content
+==============================
+
+Most search engines work best when they're given corpuses with predominantly
+text (as opposed to other data like dates, numbers, etc.) in decent quantities
+(more than a couple words). This is in stark contrast to the databases most
+people are used to, which rely heavily on non-text data to create relationships
+and for ease of querying.
+
+To this end, if search is important to you, you should take the time to
+carefully craft your ``SearchIndex`` subclasses to give the search engine the
+best information you can. This isn't necessarily hard but is worth the
+investment of time and thought. Assuming you've only ever used the
+``BasicSearchIndex``, in creating custom ``SearchIndex`` classes, there are
+some easy improvements to make that will make your search better:
+
+* For your ``document=True`` field, use a well-constructed template.
+* Add fields for data you might want to be able to filter by.
+* If the model has related data, you can squash good content from those
+ related models into the parent model's ``SearchIndex``.
+* Similarly, if you have heavily de-normalized models, it may be best
+ represented by a single indexed model rather than many indexed models.
+
+Well-Constructed Templates
+--------------------------
+
+A relatively unique concept in Haystack is the use of templates associated with
+``SearchIndex`` fields. These are data templates, will never been seen by users
+and ideally contain no HTML. They are used to collect various data from the
+model and structure it as a document for the search engine to analyze and index.
+
+.. note::
+
+ If you read nothing else, this is the single most important thing you can
+ do to make search on your site better for your users. Good templates can
+ make or break your search and providing the search engine with good content
+ to index is critical.
+
+Good templates structure the data well and incorporate as much pertinent text
+as possible. This may include additional fields such as titles, author
+information, metadata, tags/categories. Without being artificial, you want to
+construct as much context as you can. This doesn't mean you should necessarily
+include every field, but you should include fields that provide good content
+or include terms you think your users may frequently search on.
+
+Unless you have very unique numbers or dates, neither of these types of data
+are a good fit within templates. They are usually better suited to other
+fields for filtering within a ``SearchQuerySet``.
+
+Additional Fields For Filtering
+-------------------------------
+
+Documents by themselves are good for generating indexes of content but are
+generally poor for filtering content, for instance, by date. All search engines
+supported by Haystack provide a means to associate extra data as
+attributes/fields on a record. The database analogy would be adding extra
+columns to the table for filtering.
+
+Good candidates here are date fields, number fields, de-normalized data from
+related objects, etc. You can expose these things to users in the form of a
+calendar range to specify, an author to look up or only data from a certain
+series of numbers to return.
+
+You will need to plan ahead and anticipate what you might need to filter on,
+though with each field you add, you increase storage space usage. It's generally
+**NOT** recommended to include every field from a model, just ones you are
+likely to use.
+
+Related Data
+------------
+
+Related data is somewhat problematic to deal with, as most search engines are
+better with documents than they are with relationships. One way to approach this
+is to de-normalize a related child object or objects into the parent's document
+template. The inclusion of a foreign key's relevant data or a simple Django
+``{% for %}`` templatetag to iterate over the related objects can increase the
+salient data in your document. Be careful what you include and how you structure
+it, as this can have consequences on how well a result might rank in your
+search.
+
+
+Avoid Hitting The Database
+==========================
+
+A very easy but effective thing you can do to drastically reduce hits on the
+database is to pre-render your search results using stored fields then disabling
+the ``load_all`` aspect of your ``SearchView``.
+
+.. warning::
+
+ This technique may cause a substantial increase in the size of your index
+ as you are basically using it as a storage mechanism.
+
+To do this, you setup one or more stored fields (`indexed=False`) on your
+``SearchIndex`` classes. You should specify a template for the field, filling it
+with the data you'd want to display on your search results pages. When the model
+attached to the ``SearchIndex`` is placed in the index, this template will get
+rendered and stored in the index alongside the record.
+
+.. note::
+
+ The downside of this method is that the HTML for the result will be locked
+ in once it is indexed. To make changes to the structure, you'd have to
+ reindex all of your content. It also limits you to a single display of the
+ content (though you could use multiple fields if that suits your needs).
+
+The second aspect is customizing your ``SearchView`` and its templates. First,
+pass the ``load_all=False`` to your ``SearchView``, ideally in your URLconf.
+This prevents the ``SearchQuerySet`` from loading all models objects for results
+ahead of time. Then, in your template, simply display the stored content from
+your ``SearchIndex`` as the HTML result.
+
+.. warning::
+
+ To do this, you must absolutely avoid using ``{{ result.object }}`` or any
+ further accesses beyond that. That call will hit the database, not only
+ nullifying your work on lessening database hits, but actually making it
+ worse as there will now be at least query for each result, up from a single
+ query for each type of model with ``load_all=True``.
+
+
+Content-Type Specific Templates
+===============================
+
+Frequently, when displaying results, you'll want to customize the HTML output
+based on what model the result represents.
+
+In practice, the best way to handle this is through the use of ``include``
+along with the data on the ``SearchResult``.
+
+Your existing loop might look something like::
+
+ {% for result in page.object_list %}
+ <p>
+ <a href="{{ result.object.get_absolute_url }}">{{ result.object.title }}</a>
+ </p>
+ {% empty %}
+ <p>No results found.</p>
+ {% endfor %}
+
+An improved version might look like::
+
+ {% for result in page.object_list %}
+ {% if result.content_type == "blog.post" %}
+ {% include "search/includes/blog/post.html" %}
+ {% endif %}
+ {% if result.content_type == "media.photo" %}
+ {% include "search/includes/media/photo.html" %}
+ {% endif %}
+ {% empty %}
+ <p>No results found.</p>
+ {% endfor %}
+
+Those include files might look like::
+
+ # search/includes/blog/post.html
+ <div class="post_result">
+ <h3><a href="{{ result.object.get_absolute_url }}">{{ result.object.title }}</a></h3>
+
+ <p>{{ result.object.tease }}</p>
+ </div>
+
+ # search/includes/media/photo.html
+ <div class="photo_result">
+ <a href="{{ result.object.get_absolute_url }}">
+ <img src="http://your.media.example.com/media/{{ result.object.photo.url }}"></a>
+ <p>Taken By {{ result.object.taken_by }}</p>
+ </div>
+
+You can make this even better by standardizing on an includes layout, then
+writing a template tag or filter that generates the include filename. Usage
+might looks something like::
+
+ {% for result in page.object_list %}
+ {% with result|search_include as fragment %}
+ {% include fragment %}
+ {% endwith %}
+ {% empty %}
+ <p>No results found.</p>
+ {% endfor %}
+
+
+Real-Time Search
+================
+
+If your site sees heavy search traffic and up-to-date information is very
+important, Haystack provides a way to constantly keep your index up to date.
+
+You can enable the ``RealtimeSignalProcessor`` within your settings, which
+will allow Haystack to automatically update the index whenever a model is
+saved/deleted.
+
+You can find more information within the :doc:`signal_processors` documentation.
+
+
+Use Of A Queue For A Better User Experience
+===========================================
+
+By default, you have to manually reindex content, Haystack immediately tries to merge
+it into the search index. If you have a write-heavy site, this could mean your
+search engine may spend most of its time churning on constant merges. If you can
+afford a small delay between when a model is saved and when it appears in the
+search results, queuing these merges is a good idea.
+
+You gain a snappier interface for users as updates go into a queue (a fast
+operation) and then typical processing continues. You also get a lower churn
+rate, as most search engines deal with batches of updates better than many
+single updates. You can also use this to distribute load, as the queue consumer
+could live on a completely separate server from your webservers, allowing you
+to tune more efficiently.
+
+Implementing this is relatively simple. There are two parts, creating a new
+``QueuedSignalProcessor`` class and creating a queue processing script to
+handle the actual updates.
+
+For the ``QueuedSignalProcessor``, you should inherit from
+``haystack.signals.BaseSignalProcessor``, then alter the ``setup/teardown``
+methods to call an enqueuing method instead of directly calling
+``handle_save/handle_delete``. For example::
+
+ from haystack import signals
+
+
+ class QueuedSignalProcessor(signals.BaseSignalProcessor):
+ # Override the built-in.
+ def setup(self):
+ models.signals.post_save.connect(self.enqueue_save)
+ models.signals.post_delete.connect(self.enqueue_delete)
+
+ # Override the built-in.
+ def teardown(self):
+ models.signals.post_save.disconnect(self.enqueue_save)
+ models.signals.post_delete.disconnect(self.enqueue_delete)
+
+ # Add on a queuing method.
+ def enqueue_save(self, sender, instance, **kwargs):
+ # Push the save & information onto queue du jour here...
+
+ # Add on a queuing method.
+ def enqueue_delete(self, sender, instance, **kwargs):
+ # Push the delete & information onto queue du jour here...
+
+For the consumer, this is much more specific to the queue used and your desired
+setup. At a minimum, you will need to periodically consume the queue, fetch the
+correct index from the ``SearchSite`` for your application, load the model from
+the message and pass that model to the ``update_object`` or ``remove_object``
+methods on the ``SearchIndex``. Proper grouping, batching and intelligent
+handling are all additional things that could be applied on top to further
+improve performance.
diff --git a/docs/boost.rst b/docs/boost.rst
new file mode 100644
index 0000000..4a56931
--- /dev/null
+++ b/docs/boost.rst
@@ -0,0 +1,123 @@
+.. _ref-boost:
+
+=====
+Boost
+=====
+
+
+Scoring is a critical component of good search. Normal full-text searches
+automatically score a document based on how well it matches the query provided.
+However, sometimes you want certain documents to score better than they
+otherwise would. Boosting is a way to achieve this. There are three types of
+boost:
+
+* Term Boost
+* Document Boost
+* Field Boost
+
+.. note::
+
+ Document & Field boost support was added in Haystack 1.1.
+
+Despite all being types of boost, they take place at different times and have
+slightly different effects on scoring.
+
+Term boost happens at query time (when the search query is run) and is based
+around increasing the score if a certain word/phrase is seen.
+
+On the other hand, document & field boosts take place at indexing time (when
+the document is being added to the index). Document boost causes the relevance
+of the entire result to go up, where field boost causes only searches within
+that field to do better.
+
+.. warning::
+
+ Be warned that boost is very, very sensitive & can hurt overall search
+ quality if over-zealously applied. Even very small adjustments can affect
+ relevance in a big way.
+
+Term Boost
+==========
+
+Term boosting is achieved by using ``SearchQuerySet.boost``. You provide it
+the term you want to boost on & a floating point value (based around ``1.0``
+as 100% - no boost).
+
+Example::
+
+ # Slight increase in relevance for documents that include "banana".
+ sqs = SearchQuerySet().boost('banana', 1.1)
+
+ # Big decrease in relevance for documents that include "blueberry".
+ sqs = SearchQuerySet().boost('blueberry', 0.8)
+
+See the :doc:`searchqueryset_api` docs for more details on using this method.
+
+
+Document Boost
+==============
+
+Document boosting is done by adding a ``boost`` field to the prepared data
+``SearchIndex`` creates. The best way to do this is to override
+``SearchIndex.prepare``::
+
+ from haystack import indexes
+ from notes.models import Note
+
+
+ class NoteSearchIndex(indexes.SearchIndex, indexes.Indexable):
+ # Your regular fields here then...
+
+ def prepare(self, obj):
+ data = super(NoteSearchIndex, self).prepare(obj)
+ data['boost'] = 1.1
+ return data
+
+
+Another approach might be to add a new field called ``boost``. However, this
+can skew your schema and is not encouraged.
+
+
+Field Boost
+===========
+
+Field boosting is enabled by setting the ``boost`` kwarg on the desired field.
+An example of this might be increasing the significance of a ``title``::
+
+ from haystack import indexes
+ from notes.models import Note
+
+
+ class NoteSearchIndex(indexes.SearchIndex, indexes.Indexable):
+ text = indexes.CharField(document=True, use_template=True)
+ title = indexes.CharField(model_attr='title', boost=1.125)
+
+ def get_model(self):
+ return Note
+
+.. note::
+
+ Field boosting only has an effect when the SearchQuerySet filters on the
+ field which has been boosted. If you are using a default search view or
+ form you will need override the search method or other include the field
+ in your search query. This example CustomSearchForm searches the automatic
+ ``content`` field and the ``title`` field which has been boosted::
+
+ from haystack.forms import SearchForm
+
+ class CustomSearchForm(SearchForm):
+
+ def search(self):
+ if not self.is_valid():
+ return self.no_query_found()
+
+ if not self.cleaned_data.get('q'):
+ return self.no_query_found()
+
+ q = self.cleaned_data['q']
+ sqs = self.searchqueryset.filter(SQ(content=AutoQuery(q)) | SQ(title=AutoQuery(q)))
+
+ if self.load_all:
+ sqs = sqs.load_all()
+
+ return sqs.highlight()
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 0000000..db3e990
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,207 @@
+# -*- coding: utf-8 -*-
+#
+# Haystack documentation build configuration file, created by
+# sphinx-quickstart on Wed Apr 15 08:50:46 2009.
+#
+# This file is execfile()d with the current directory set to its containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import os
+import sys
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#sys.path.append(os.path.abspath('.'))
+
+# -- General configuration -----------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be extensions
+# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
+extensions = []
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix of source filenames.
+source_suffix = '.rst'
+
+# The encoding of source files.
+#source_encoding = 'utf-8'
+
+# The master toctree document.
+master_doc = 'toc'
+
+# General information about the project.
+project = u'Haystack'
+copyright = u'2009-2013, Daniel Lindsley'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = '2.1.1'
+# The full version, including alpha/beta/rc tags.
+release = '2.1.1-dev'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#today = ''
+# Else, today_fmt is used as the format for a strftime call.
+#today_fmt = '%B %d, %Y'
+
+# List of documents that shouldn't be included in the build.
+#unused_docs = []
+
+# List of directories, relative to source directory, that shouldn't be searched
+# for source files.
+exclude_trees = ['_build']
+
+# The reST default role (used for this markup: `text`) to use for all documents.
+#default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+#modindex_common_prefix = []
+
+
+# -- Options for HTML output ---------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages. Major themes that come with
+# Sphinx are currently 'default' and 'sphinxdoc'.
+# html_theme = 'haystack_theme'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further. For a list of options available for each theme, see the
+# documentation.
+# html_theme_options = {
+# "rightsidebar": "true",
+# "bodyfont": "'Helvetica Neue', Arial, sans-serif",
+# "sidebarbgcolor": "#303c0c",
+# "sidebartextcolor": "#effbcb",
+# "sidebarlinkcolor": "#eef7ab",
+# "relbarbgcolor": "#caecff",
+# "relbartextcolor": "#262511",
+# "relbarlinkcolor": "#262511",
+# "footerbgcolor": "#262511",
+# }
+
+# Add any paths that contain custom themes here, relative to this directory.
+html_theme_path = ['.']
+
+# The name for this set of Sphinx documents. If None, it defaults to
+# "<project> v<release> documentation".
+#html_title = None
+
+# A shorter title for the navigation bar. Default is the same as html_title.
+#html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#html_logo = None
+
+# The name of an image file (within the static path) to use as favicon of the
+# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+#html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
+# using the given strftime format.
+#html_last_updated_fmt = '%b %d, %Y'
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#html_additional_pages = {}
+
+# If false, no module index is generated.
+#html_use_modindex = True
+
+# If false, no index is generated.
+#html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+#html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+#html_show_sourcelink = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it. The value of this option must be the
+# base URL from which the finished HTML is served.
+#html_use_opensearch = ''
+
+# If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml").
+#html_file_suffix = ''
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'Haystackdoc'
+
+
+# -- Options for LaTeX output --------------------------------------------------
+
+# The paper size ('letter' or 'a4').
+#latex_paper_size = 'letter'
+
+# The font size ('10pt', '11pt' or '12pt').
+#latex_font_size = '10pt'
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title, author, documentclass [howto/manual]).
+latex_documents = [
+ ('index', 'Haystack.tex', u'Haystack Documentation',
+ u'Daniel Lindsley', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#latex_use_parts = False
+
+# Additional stuff for the LaTeX preamble.
+#latex_preamble = ''
+
+# Documents to append as an appendix to all manuals.
+#latex_appendices = []
+
+# If false, no module index is generated.
+#latex_use_modindex = True
diff --git a/docs/contributing.rst b/docs/contributing.rst
new file mode 100644
index 0000000..7806c19
--- /dev/null
+++ b/docs/contributing.rst
@@ -0,0 +1,132 @@
+============
+Contributing
+============
+
+Haystack is open-source and, as such, grows (or shrinks) & improves in part
+due to the community. Below are some guidelines on how to help with the project.
+
+
+Philosophy
+==========
+
+* Haystack is BSD-licensed. All contributed code must be either
+
+ * the original work of the author, contributed under the BSD, or...
+ * work taken from another project released under a BSD-compatible license.
+
+* GPL'd (or similar) works are not eligible for inclusion.
+* Haystack's git master branch should always be stable, production-ready &
+ passing all tests.
+* Major releases (1.x.x) are commitments to backward-compatibility of the public APIs.
+ Any documented API should ideally not change between major releases.
+ The exclusion to this rule is in the event of either a security issue
+ or to accommodate changes in Django itself.
+* Minor releases (x.3.x) are for the addition of substantial features or major
+ bugfixes.
+* Patch releases (x.x.4) are for minor features or bugfixes.
+
+
+Guidelines For Reporting An Issue/Feature
+=========================================
+
+So you've found a bug or have a great idea for a feature. Here's the steps you
+should take to help get it added/fixed in Haystack:
+
+* First, check to see if there's an existing issue/pull request for the
+ bug/feature. All issues are at https://github.com/toastdriven/django-haystack/issues
+ and pull reqs are at https://github.com/toastdriven/django-haystack/pulls.
+* If there isn't one there, please file an issue. The ideal report includes:
+
+ * A description of the problem/suggestion.
+ * How to recreate the bug.
+ * If relevant, including the versions of your:
+
+ * Python interpreter
+ * Django
+ * Haystack
+ * Search engine used (as well as bindings)
+ * Optionally of the other dependencies involved
+
+ * Ideally, creating a pull request with a (failing) test case demonstrating
+ what's wrong. This makes it easy for us to reproduce & fix the problem.
+ Instructions for running the tests are at :doc:`index`
+
+You might also hop into the IRC channel (``#haystack`` on ``irc.freenode.net``)
+& raise your question there, as there may be someone who can help you with a
+work-around.
+
+
+Guidelines For Contributing Code
+================================
+
+If you're ready to take the plunge & contribute back some code/docs, the
+process should look like:
+
+* Fork the project on GitHub into your own account.
+* Clone your copy of Haystack.
+* Make a new branch in git & commit your changes there.
+* Push your new branch up to GitHub.
+* Again, ensure there isn't already an issue or pull request out there on it.
+ If there is & you feel you have a better fix, please take note of the issue
+ number & mention it in your pull request.
+* Create a new pull request (based on your branch), including what the
+ problem/feature is, versions of your software & referencing any related
+ issues/pull requests.
+
+In order to be merged into Haystack, contributions must have the following:
+
+* A solid patch that:
+
+ * is clear.
+ * works across all supported versions of Python/Django.
+ * follows the existing style of the code base (mostly PEP-8).
+ * comments included as needed.
+
+* A test case that demonstrates the previous flaw that now passes
+ with the included patch.
+* If it adds/changes a public API, it must also include documentation
+ for those changes.
+* Must be appropriately licensed (see "Philosophy").
+* Adds yourself to the AUTHORS file.
+
+If your contribution lacks any of these things, they will have to be added
+by a core contributor before being merged into Haystack proper, which may take
+substantial time for the all-volunteer team to get to.
+
+
+Guidelines For Core Contributors
+================================
+
+If you've been granted the commit bit, here's how to shepherd the changes in:
+
+* Any time you go to work on Haystack, please use ``git pull --rebase`` to fetch
+ the latest changes.
+* Any new features/bug fixes must meet the above guidelines for contributing
+ code (solid patch/tests passing/docs included).
+* Commits are typically cherry-picked onto a branch off master.
+
+ * This is done so as not to include extraneous commits, as some people submit
+ pull reqs based on their git master that has other things applied to it.
+
+* A set of commits should be squashed down to a single commit.
+
+ * ``git merge --squash`` is a good tool for performing this, as is
+ ``git rebase -i HEAD~N``.
+ * This is done to prevent anyone using the git repo from accidently pulling
+ work-in-progress commits.
+
+* Commit messages should use past tense, describe what changed & thank anyone
+ involved. Examples::
+
+ """Added support for the latest version of Whoosh (v2.3.2)."""
+ """Fixed a bug in ``solr_backend.py``. Thanks to joeschmoe for the report!"""
+ """BACKWARD-INCOMPATIBLE: Altered the arguments passed to ``SearchBackend``.
+
+ Further description appears here if the change warrants an explanation
+ as to why it was done."""
+
+* For any patches applied from a contributor, please ensure their name appears
+ in the AUTHORS file.
+* When closing issues or pull requests, please reference the SHA in the closing
+ message (i.e. ``Thanks! Fixed in SHA: 6b93f6``). GitHub will automatically
+ link to it.
diff --git a/docs/creating_new_backends.rst b/docs/creating_new_backends.rst
new file mode 100644
index 0000000..df5551c
--- /dev/null
+++ b/docs/creating_new_backends.rst
@@ -0,0 +1,34 @@
+.. _ref-creating-new-backends:
+
+=====================
+Creating New Backends
+=====================
+
+The process should be fairly simple.
+
+#. Create new backend file. Name is important.
+#. Two classes inside.
+
+ #. SearchBackend (inherit from haystack.backends.BaseSearchBackend)
+ #. SearchQuery (inherit from haystack.backends.BaseSearchQuery)
+
+
+SearchBackend
+=============
+
+Responsible for the actual connection and low-level details of interacting with
+the backend.
+
+* Connects to search engine
+* Method for saving new docs to index
+* Method for removing docs from index
+* Method for performing the actual query
+
+
+SearchQuery
+===========
+
+Responsible for taking structured data about the query and converting it into a
+backend appropriate format.
+
+* Method for creating the backend specific query - ``build_query``.
diff --git a/docs/debugging.rst b/docs/debugging.rst
new file mode 100644
index 0000000..f2e2b4a
--- /dev/null
+++ b/docs/debugging.rst
@@ -0,0 +1,107 @@
+.. ref-debugging:
+
+==================
+Debugging Haystack
+==================
+
+There are some common problems people run into when using Haystack for the first
+time. Some of the common problems and things to try appear below.
+
+.. note::
+
+ As a general suggestion, your best friend when debugging an issue is to
+ use the ``pdb`` library included with Python. By dropping a
+ ``import pdb; pdb.set_trace()`` in your code before the issue occurs, you
+ can step through and examine variable/logic as you progress through. Make
+ sure you don't commit those ``pdb`` lines though.
+
+
+"No module named haystack."
+===========================
+
+This problem usually occurs when first adding Haystack to your project.
+
+* Are you using the ``haystack`` directory within your ``django-haystack``
+ checkout/install?
+* Is the ``haystack`` directory on your ``PYTHONPATH``? Alternatively, is
+ ``haystack`` symlinked into your project?
+* Start a Django shell (``./manage.py shell``) and try ``import haystack``.
+ You may receive a different, more descriptive error message.
+* Double-check to ensure you have no circular imports. (i.e. module A tries
+ importing from module B which is trying to import from module A.)
+
+
+"No results found." (On the web page)
+=====================================
+
+Several issues can cause no results to be found. Most commonly it is either
+not running a ``rebuild_index`` to populate your index or having a blank
+``document=True`` field, resulting in no content for the engine to search on.
+
+* Do you have a ``search_indexes.py`` located within an installed app?
+* Do you have data in your database?
+* Have you run a ``./manage.py rebuild_index`` to index all of your content?
+* Try running ``./manage.py rebuild_index -v2`` for more verbose output to
+ ensure data is being processed/inserted.
+* Start a Django shell (``./manage.py shell``) and try::
+
+ >>> from haystack.query import SearchQuerySet
+ >>> sqs = SearchQuerySet().all()
+ >>> sqs.count()
+
+* You should get back an integer > 0. If not, check the above and reindex.
+
+ >>> sqs[0] # Should get back a SearchResult object.
+ >>> sqs[0].id # Should get something back like 'myapp.mymodel.1'.
+ >>> sqs[0].text # ... or whatever your document=True field is.
+
+* If you get back either ``u''`` or ``None``, it means that your data isn't
+ making it into the main field that gets searched. You need to check that the
+ field either has a template that uses the model data, a ``model_attr`` that
+ pulls data directly from the model or a ``prepare/prepare_FOO`` method that
+ populates the data at index time.
+* Check the template for your search page and ensure it is looping over the
+ results properly. Also ensure that it's either accessing valid fields coming
+ back from the search engine or that it's trying to access the associated
+ model via the ``{{ result.object.foo }}`` lookup.
+
+
+"LockError: [Errno 17] File exists: '/path/to/whoosh_index/_MAIN_LOCK'"
+=======================================================================
+
+This is a Whoosh-specific traceback. It occurs when the Whoosh engine in one
+process/thread is locks the index files for writing while another process/thread
+tries to access them. This is a common error when using ``RealtimeSignalProcessor``
+with Whoosh under any kind of load, which is why it's only recommended for
+small sites or development.
+
+The only real solution is to set up a cron job that runs
+``./manage.py rebuild_index`` (optionally with ``--age=24``) that runs nightly
+(or however often you need) to refresh the search indexes. Then disable the
+use of the ``RealtimeSignalProcessor`` within your settings.
+
+The downside to this is that you lose real-time search. For many people, this
+isn't an issue and this will allow you to scale Whoosh up to a much higher
+traffic. If this is not acceptable, you should investigate either the Solr or
+Xapian backends.
+
+
+"Failed to add documents to Solr: [Reason: None]"
+=================================================
+
+This is a Solr-specific traceback. It generally occurs when there is an error
+with your ``HAYSTACK_CONNECTIONS[<alias>]['URL']``. Since Solr acts as a webservice, you should
+test the URL in your web browser. If you receive an error, you may need to
+change your URL.
+
+This can also be caused when using old versions of pysolr (2.0.9 and before) with httplib2 and
+including a trailing slash in your ``HAYSTACK_CONNECTIONS[<alias>]['URL']``. If this applies to
+you, please upgrade to the current version of pysolr.
+
+
+"Got an unexpected keyword argument 'boost'"
+============================================
+
+This is a Solr-specific traceback. This can also be caused when using old
+versions of pysolr (2.0.12 and before). Please upgrade your version of
+pysolr (2.0.13+).
diff --git a/docs/faceting.rst b/docs/faceting.rst
new file mode 100644
index 0000000..f2e64f4
--- /dev/null
+++ b/docs/faceting.rst
@@ -0,0 +1,328 @@
+.. _ref-faceting:
+
+========
+Faceting
+========
+
+What Is Faceting?
+-----------------
+
+Faceting is a way to provide users with feedback about the number of documents
+which match terms they may be interested in. At its simplest, it gives
+document counts based on words in the corpus, date ranges, numeric ranges or
+even advanced queries.
+
+Faceting is particularly useful when trying to provide users with drill-down
+capabilities. The general workflow in this regard is:
+
+ #. You can choose what you want to facet on.
+ #. The search engine will return the counts it sees for that match.
+ #. You display those counts to the user and provide them with a link.
+ #. When the user chooses a link, you narrow the search query to only include
+ those conditions and display the results, potentially with further facets.
+
+.. note::
+
+ Faceting can be difficult, especially in providing the user with the right
+ number of options and/or the right areas to be able to drill into. This
+ is unique to every situation and demands following what real users need.
+
+ You may want to consider logging queries and looking at popular terms to
+ help you narrow down how you can help your users.
+
+Haystack provides functionality so that all of the above steps are possible.
+From the ground up, let's build a faceted search setup. This assumes that you
+have been to work through the :doc:`tutorial` and have a working Haystack
+installation. The same setup from the :doc:`tutorial` applies here.
+
+1. Determine Facets And ``SearchQuerySet``
+------------------------------------------
+
+Determining what you want to facet on isn't always easy. For our purposes,
+we'll facet on the ``author`` field.
+
+In order to facet effectively, the search engine should store both a standard
+representation of your data as well as exact version to facet on. This is
+generally accomplished by duplicating the field and storing it via two
+different types. Duplication is suggested so that those fields are still
+searchable in the standard ways.
+
+To inform Haystack of this, you simply pass along a ``faceted=True`` parameter
+on the field(s) you wish to facet on. So to modify our existing example::
+
+ class NoteIndex(SearchIndex, indexes.Indexable):
+ text = CharField(document=True, use_template=True)
+ author = CharField(model_attr='user', faceted=True)
+ pub_date = DateTimeField(model_attr='pub_date')
+
+Haystack quietly handles all of the backend details for you, creating a similar
+field to the type you specified with ``_exact`` appended. Our example would now
+have both a ``author`` and ``author_exact`` field, though this is largely an
+implementation detail.
+
+To pull faceting information out of the index, we'll use the
+``SearchQuerySet.facet`` method to setup the facet and the
+``SearchQuerySet.facet_counts`` method to retrieve back the counts seen.
+
+Experimenting in a shell (``./manage.py shell``) is a good way to get a feel
+for what various facets might look like::
+
+ >>> from haystack.query import SearchQuerySet
+ >>> sqs = SearchQuerySet().facet('author')
+ >>> sqs.facet_counts()
+ {
+ 'dates': {},
+ 'fields': {
+ 'author': [
+ ('john', 4),
+ ('daniel', 2),
+ ('sally', 1),
+ ('terry', 1),
+ ],
+ },
+ 'queries': {}
+ }
+
+.. note::
+
+ Note that, despite the duplication of fields, you should provide the
+ regular name of the field when faceting. Haystack will intelligently
+ handle the underlying details and mapping.
+
+As you can see, we get back a dictionary which provides access to the three
+types of facets available: ``fields``, ``dates`` and ``queries``. Since we only
+faceted on the ``author`` field (which actually facets on the ``author_exact``
+field managed by Haystack), only the ``fields`` key has any data
+associated with it. In this case, we have a corpus of eight documents with four
+unique authors.
+
+.. note::
+ Facets are chainable, like most ``SearchQuerySet`` methods. However, unlike
+ most ``SearchQuerySet`` methods, they are *NOT* affected by ``filter`` or
+ similar methods. The only method that has any effect on facets is the
+ ``narrow`` method (which is how you provide drill-down).
+
+Configuring facet behaviour
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+You can configure the behaviour of your facets by passing options
+for each facet in your SearchQuerySet. These options can be backend specific.
+
+**limit**
+*tested on Solr*
+
+The ``limit`` parameter limits the results for each query. On Solr, the default `facet.limit`_ is 100 and a
+negative number removes the limit.
+
+.. _facet.limit: https://wiki.apache.org/solr/SimpleFacetParameters#facet.limit
+
+Example usage::
+
+ >>> from haystack.query import SearchQuerySet
+ >>> sqs = SearchQuerySet().facet('author', limit=-1)
+ >>> sqs.facet_counts()
+ {
+ 'dates': {},
+ 'fields': {
+ 'author': [
+ ('abraham', 1),
+ ('benny', 2),
+ ('cindy', 1),
+ ('diana', 5),
+ ],
+ },
+ 'queries': {}
+ }
+
+ >>> sqs = SearchQuerySet().facet('author', limit=2)
+ >>> sqs.facet_counts()
+ {
+ 'dates': {},
+ 'fields': {
+ 'author': [
+ ('abraham', 1),
+ ('benny', 2),
+ ],
+ },
+ 'queries': {}
+ }
+
+**sort**
+*tested on Solr*
+
+The ``sort`` parameter will sort the results for each query. Solr's default
+`facet.sort`_ is ``index``, which will sort the facets alphabetically. Changing
+the parameter to ``count`` will sort the facets by the number of results for
+each facet value.
+
+.. _facet.sort: https://wiki.apache.org/solr/SimpleFacetParameters#facet.sort
+
+
+Example usage::
+
+ >>> from haystack.query import SearchQuerySet
+ >>> sqs = SearchQuerySet().facet('author', sort='index', )
+ >>> sqs.facet_counts()
+ {
+ 'dates': {},
+ 'fields': {
+ 'author': [
+ ('abraham', 1),
+ ('benny', 2),
+ ('cindy', 1),
+ ('diana', 5),
+ ],
+ },
+ 'queries': {}
+ }
+
+ >>> sqs = SearchQuerySet().facet('author', sort='count', )
+ >>> sqs.facet_counts()
+ {
+ 'dates': {},
+ 'fields': {
+ 'author': [
+ ('diana', 5),
+ ('benny', 2),
+ ('abraham', 1),
+ ('cindy', 1),
+ ],
+ },
+ 'queries': {}
+ }
+
+
+Now that we have the facet we want, it's time to implement it.
+
+2. Switch to the ``FacetedSearchView`` and ``FacetedSearchForm``
+----------------------------------------------------------------
+
+There are three things that we'll need to do to expose facets to our frontend.
+The first is construct the ``SearchQuerySet`` we want to use. We should have
+that from the previous step. The second is to switch to the
+``FacetedSearchView``. This view is useful because it prepares the facet counts
+and provides them in the context as ``facets``.
+
+Optionally, the third step is to switch to the ``FacetedSearchForm``. As it
+currently stands, this is only useful if you want to provide drill-down, though
+it may provide more functionality in the future. We'll do it for the sake of
+having it in place but know that it's not required.
+
+In your URLconf, you'll need to switch to the ``FacetedSearchView``. Your
+URLconf should resemble::
+
+ from django.conf.urls.defaults import *
+ from haystack.forms import FacetedSearchForm
+ from haystack.query import SearchQuerySet
+ from haystack.views import FacetedSearchView
+
+
+ sqs = SearchQuerySet().facet('author')
+
+
+ urlpatterns = patterns('haystack.views',
+ url(r'^$', FacetedSearchView(form_class=FacetedSearchForm, searchqueryset=sqs), name='haystack_search'),
+ )
+
+The ``FacetedSearchView`` will now instantiate the ``FacetedSearchForm`` and use
+the ``SearchQuerySet`` we provided. Now, a ``facets`` variable will be present
+in the context. This is added in an overridden ``extra_context`` method.
+
+
+3. Display The Facets In The Template
+-------------------------------------
+
+Templating facets involves simply adding an extra bit of processing to display
+the facets (and optionally to link to provide drill-down). An example template
+might look like this::
+
+ <form method="get" action=".">
+ <table>
+ <tbody>
+ {{ form.as_table }}
+ <tr>
+ <td>&nbsp;</td>
+ <td><input type="submit" value="Search"></td>
+ </tr>
+ </tbody>
+ </table>
+ </form>
+
+ {% if query %}
+ <!-- Begin faceting. -->
+ <h2>By Author</h2>
+
+ <div>
+ <dl>
+ {% if facets.fields.author %}
+ <dt>Author</dt>
+ {# Provide only the top 5 authors #}
+ {% for author in facets.fields.author|slice:":5" %}
+ <dd><a href="{{ request.get_full_path }}&amp;selected_facets=author_exact:{{ author.0|urlencode }}">{{ author.0 }}</a> ({{ author.1 }})</dd>
+ {% endfor %}
+ {% else %}
+ <p>No author facets.</p>
+ {% endif %}
+ </dl>
+ </div>
+ <!-- End faceting -->
+
+ <!-- Display results... -->
+ {% for result in page.object_list %}
+ <div class="search_result">
+ <h3><a href="{{ result.object.get_absolute_url }}">{{ result.object.title }}</a></h3>
+
+ <p>{{ result.object.body|truncatewords:80 }}</p>
+ </div>
+ {% empty %}
+ <p>Sorry, no results found.</p>
+ {% endfor %}
+ {% endif %}
+
+Displaying the facets is a matter of looping through the facets you want and
+providing the UI to suit. The ``author.0`` is the facet text from the backend
+and the ``author.1`` is the facet count.
+
+4. Narrowing The Search
+-----------------------
+
+We've also set ourselves up for the last bit, the drill-down aspect. By
+appending on the ``selected_facets`` to the URLs, we're informing the
+``FacetedSearchForm`` that we want to narrow our results to only those
+containing the author we provided.
+
+For a concrete example, if the facets on author come back as::
+
+ {
+ 'dates': {},
+ 'fields': {
+ 'author': [
+ ('john', 4),
+ ('daniel', 2),
+ ('sally', 1),
+ ('terry', 1),
+ ],
+ },
+ 'queries': {}
+ }
+
+You should present a list similar to::
+
+ <ul>
+ <li><a href="/search/?q=Haystack&selected_facets=author_exact:john">john</a> (4)</li>
+ <li><a href="/search/?q=Haystack&selected_facets=author_exact:daniel">daniel</a> (2)</li>
+ <li><a href="/search/?q=Haystack&selected_facets=author_exact:sally">sally</a> (1)</li>
+ <li><a href="/search/?q=Haystack&selected_facets=author_exact:terry">terry</a> (1)</li>
+ </ul>
+
+.. warning::
+
+ Haystack can automatically handle most details around faceting. However,
+ since ``selected_facets`` is passed directly to narrow, it must use the
+ duplicated field name. Improvements to this are planned but incomplete.
+
+This is simply the default behavior but it is possible to override or provide
+your own form which does additional processing. You could also write your own
+faceted ``SearchView``, which could provide additional/different facets based
+on facets chosen. There is a wide range of possibilities available to help the
+user navigate your content.
diff --git a/docs/faq.rst b/docs/faq.rst
new file mode 100644
index 0000000..94fd0d3
--- /dev/null
+++ b/docs/faq.rst
@@ -0,0 +1,117 @@
+.. _ref-frequently-asked-questions:
+
+==============================
+(In)Frequently Asked Questions
+==============================
+
+
+What is Haystack?
+=================
+
+Haystack is meant to be a portable interface to a search engine of your choice.
+Some might call it a search framework, an abstraction layer or what have you.
+The idea is that you write your search code once and should be able to freely
+switch between backends as your situation necessitates.
+
+
+Why should I consider using Haystack?
+=====================================
+
+Haystack is targeted at the following use cases:
+
+* If you want to feature search on your site and search solutions like Google or
+ Yahoo search don't fit your needs.
+* If you want to be able to customize your search and search on more than just
+ the main content.
+* If you want to have features like drill-down (faceting) or "More Like This".
+* If you want a interface that is non-search engine specific, allowing you to
+ change your mind later without much rewriting.
+
+
+When should I not be using Haystack?
+====================================
+
+* Non-Model-based data. If you just want to index random data (flat files,
+ alternate sources, etc.), Haystack isn't a good solution. Haystack is very
+ ``Model``-based and doesn't work well outside of that use case.
+* Ultra-high volume. Because of the very nature of Haystack (abstraction layer),
+ there's more overhead involved. This makes it portable, but as with all
+ abstraction layers, you lose a little performance. You also can't take full
+ advantage of the exact feature-set of your search engine. This is the price
+ of pluggable backends.
+
+
+Why was Haystack created when there are so many other search options?
+=====================================================================
+
+The proliferation of search options in Django is a relatively recent development
+and is actually one of the reasons for Haystack's existence. There are too
+many options that are only partial solutions or are too engine specific.
+
+Further, most use an unfamiliar API and documentation is lacking in most cases.
+
+Haystack is an attempt to unify these efforts into one solution. That's not to
+say there should be no alternatives, but Haystack should provide a good
+solution to 80%+ of the search use cases out there.
+
+
+What's the history behind Haystack?
+===================================
+
+Haystack started because of my frustration with the lack of good search options
+(before many other apps came out) and as the result of extensive use of
+Djangosearch. Djangosearch was a decent solution but had a number of
+shortcomings, such as:
+
+* Tied to the models.py, so you'd have to modify the source of third-party (
+ or django.contrib) apps in order to effectively use it.
+* All or nothing approach to indexes. So all indexes appear on all sites and
+ in all places.
+* Lack of tests.
+* Lack of documentation.
+* Uneven backend implementations.
+
+The initial idea was to simply fork Djangosearch and improve on these (and
+other issues). However, after stepping back, I decided to overhaul the entire
+API (and most of the underlying code) to be more representative of what I would
+want as an end-user. The result was starting afresh and reusing concepts (and
+some code) from Djangosearch as needed.
+
+As a result of this heritage, you can actually still find some portions of
+Djangosearch present in Haystack (especially in the ``SearchIndex`` and
+``SearchBackend`` classes) where it made sense. The original authors of
+Djangosearch are aware of this and thus far have seemed to be fine with this
+reuse.
+
+
+Why doesn't <search engine X> have a backend included in Haystack?
+==================================================================
+
+Several possibilities on this.
+
+#. Licensing
+
+ A common problem is that the Python bindings for a specific engine may
+ have been released under an incompatible license. The goal is for Haystack
+ to remain BSD licensed and importing bindings with an incompatible license
+ can technically convert the entire codebase to that license. This most
+ commonly occurs with GPL'ed bindings.
+
+#. Lack of time
+
+ The search engine in question may be on the list of backends to add and we
+ simply haven't gotten to it yet. We welcome patches for additional backends.
+
+#. Incompatible API
+
+ In order for an engine to work well with Haystack, a certain baseline set of
+ features is needed. This is often an issue when the engine doesn't support
+ ranged queries or additional attributes associated with a search record.
+
+#. We're not aware of the engine
+
+ If you think we may not be aware of the engine you'd like, please tell us
+ about it (preferably via the group -
+ http://groups.google.com/group/django-haystack/). Be sure to check through
+ the backends (in case it wasn't documented) and search the history on the
+ group to minimize duplicates.
diff --git a/docs/glossary.rst b/docs/glossary.rst
new file mode 100644
index 0000000..f6a1e6e
--- /dev/null
+++ b/docs/glossary.rst
@@ -0,0 +1,76 @@
+.. _ref-glossary:
+
+========
+Glossary
+========
+
+Search is a domain full of its own jargon and definitions. As this may be an
+unfamiliar territory to many developers, what follows are some commonly used
+terms and what they mean.
+
+
+Engine
+ An engine, for the purposes of Haystack, is a third-party search solution.
+ It might be a full service (i.e. Solr_) or a library to build an
+ engine with (i.e. Whoosh_)
+
+.. _Solr: http://lucene.apache.org/solr/
+.. _Whoosh: https://bitbucket.org/mchaput/whoosh/
+
+Index
+ The datastore used by the engine is called an index. Its structure can vary
+ wildly between engines but commonly they resemble a document store. This is
+ the source of all information in Haystack.
+
+Document
+ A document is essentially a record within the index. It usually contains at
+ least one blob of text that serves as the primary content the engine searches
+ and may have additional data hung off it.
+
+Corpus
+ A term for a collection of documents. When talking about the documents stored
+ by the engine (rather than the technical implementation of the storage), this
+ term is commonly used.
+
+Field
+ Within the index, each document may store extra data with the main content as
+ a field. Also sometimes called an attribute, this usually represents metadata
+ or extra content about the document. Haystack can use these fields for
+ filtering and display.
+
+Term
+ A term is generally a single word (or word-like) string of characters used
+ in a search query.
+
+Stemming
+ A means of determining if a word has any root words. This varies by language,
+ but in English, this generally consists of removing plurals, an action form of
+ the word, et cetera. For instance, in English, 'giraffes' would stem to
+ 'giraffe'. Similarly, 'exclamation' would stem to 'exclaim'. This is useful
+ for finding variants of the word that may appear in other documents.
+
+Boost
+ Boost provides a means to take a term or phrase from a search query and alter
+ the relevance of a result based on if that term is found in the result, a form
+ of weighting. For instance, if you wanted to more heavily weight results that
+ included the word 'zebra', you'd specify a boost for that term within the
+ query.
+
+More Like This
+ Incorporating techniques from information retrieval and artificial
+ intelligence, More Like This is a technique for finding other documents within
+ the index that closely resemble the document in question. This is useful for
+ programmatically generating a list of similar content for a user to browse
+ based on the current document they are viewing.
+
+Faceting
+ Faceting is a way to provide insight to the user into the contents of your
+ corpus. In its simplest form, it is a set of document counts returned with
+ results when performing a query. These counts can be used as feedback for
+ the user, allowing the user to choose interesting aspects of their search
+ results and "drill down" into those results.
+
+ An example might be providing a facet on an ``author`` field, providing back a
+ list of authors and the number of documents in the index they wrote. This
+ could be presented to the user with a link, allowing the user to click and
+ narrow their original search to all results by that author.
diff --git a/docs/haystack_theme/layout.html b/docs/haystack_theme/layout.html
new file mode 100644
index 0000000..e1d4ab3
--- /dev/null
+++ b/docs/haystack_theme/layout.html
@@ -0,0 +1,22 @@
+{% extends "basic/layout.html" %}
+
+{%- block extrahead %}
+ <link rel="stylesheet" href="http://haystacksearch.org/css/front.css" media="screen">
+ <link rel="stylesheet" href="_static/documentation.css" media="screen">
+{% endblock %}
+
+{%- block header %}
+ <div id="header">
+ <h1>Haystack</h1>
+ <p>Modular search for Django</p>
+
+ <ul class="features">
+ <li>Term Boost</li>
+ <li>More Like This</li>
+ <li>Faceting</li>
+ <li>Stored (non-indexed) fields</li>
+ <li>Highlighting</li>
+ <li>Spelling Suggestions</li>
+ </ul>
+ </div>
+{% endblock %} \ No newline at end of file
diff --git a/docs/haystack_theme/static/documentation.css b/docs/haystack_theme/static/documentation.css
new file mode 100644
index 0000000..3e9492c
--- /dev/null
+++ b/docs/haystack_theme/static/documentation.css
@@ -0,0 +1,29 @@
+a, a:link, a:hover { background-color: transparent !important; color: #CAECFF; outline-color: transparent !important; text-decoration: underline; }
+dl dt { text-decoration: underline; }
+dl.class dt, dl.method dt { background-color: #444444; padding: 5px; text-decoration: none; }
+tt.descname { font-weight: normal; }
+dl.method dt span.optional { font-weight: normal; }
+div#header { margin-bottom: 0px; }
+div.document, div.related, div.footer { width: 900px; margin: 0 auto; }
+div.document { margin-top: 10px; }
+div.related { background-color: #262511; padding-left: 10px; padding-right: 10px; }
+div.documentwrapper { width:640px; float:left;}
+div.body h1,
+div.body h2,
+div.body h3,
+div.body h4,
+div.body h5,
+div.body h6 {
+ background-color: #053211;
+ font-weight: normal;
+ border-bottom: 2px solid #262511;
+ margin: 20px -20px 10px -20px;
+ padding: 3px 0 3px 10px;
+}
+div.sphinxsidebar { width:220px; float:right;}
+div.sphinxsidebar ul { padding-left: 10px; }
+div.sphinxsidebar ul ul { padding-left: 10px; margin-left: 10px; }
+div.bodywrapper { margin: 0px; }
+div.highlight-python, div.highlight { background-color: #262511; margin-bottom: 10px; padding: 10px; }
+div.footer { background-color:#262511; font-size: 90%; padding: 10px; }
+table thead { background-color: #053211; border-bottom: 1px solid #262511; } \ No newline at end of file
diff --git a/docs/haystack_theme/theme.conf b/docs/haystack_theme/theme.conf
new file mode 100644
index 0000000..3161b4d
--- /dev/null
+++ b/docs/haystack_theme/theme.conf
@@ -0,0 +1,2 @@
+[theme]
+inherit = basic \ No newline at end of file
diff --git a/docs/highlighting.rst b/docs/highlighting.rst
new file mode 100644
index 0000000..5c95619
--- /dev/null
+++ b/docs/highlighting.rst
@@ -0,0 +1,77 @@
+.. _ref-highlighting:
+
+============
+Highlighting
+============
+
+Haystack supports two different methods of highlighting. You can either use
+``SearchQuerySet.highlight`` or the built-in ``{% highlight %}`` template tag,
+which uses the ``Highlighter`` class. Each approach has advantages and
+disadvantages you need to weigh when deciding which to use.
+
+If you want portable, flexible, decently fast code, the
+``{% highlight %}`` template tag (or manually using the underlying
+``Highlighter`` class) is the way to go. On the other hand, if you care more
+about speed and will only ever be using one backend,
+``SearchQuerySet.highlight`` may suit your needs better.
+
+Use of ``SearchQuerySet.highlight`` is documented in the
+:doc:`searchqueryset_api` documentation and the ``{% highlight %}`` tag is
+covered in the :doc:`templatetags` documentation, so the rest of this material
+will cover the ``Highlighter`` implementation.
+
+
+``Highlighter``
+---------------
+
+The ``Highlighter`` class is a pure-Python implementation included with Haystack
+that's designed for flexibility. If you use the ``{% highlight %}`` template
+tag, you'll be automatically using this class. You can also use it manually in
+your code. For example::
+
+ >>> from haystack.utils import Highlighter
+
+ >>> my_text = 'This is a sample block that would be more meaningful in real life.'
+ >>> my_query = 'block meaningful'
+
+ >>> highlight = Highlighter(my_query)
+ >>> highlight.highlight(my_text)
+ u'...<span class="highlighted">block</span> that would be more <span class="highlighted">meaningful</span> in real life.'
+
+The default implementation takes three optional kwargs: ``html_tag``,
+``css_class`` and ``max_length``. These allow for basic customizations to the
+output, like so::
+
+ >>> from haystack.utils import Highlighter
+
+ >>> my_text = 'This is a sample block that would be more meaningful in real life.'
+ >>> my_query = 'block meaningful'
+
+ >>> highlight = Highlighter(my_query, html_tag='div', css_class='found', max_length=35)
+ >>> highlight.highlight(my_text)
+ u'...<div class="found">block</div> that would be more <div class="found">meaningful</div>...'
+
+Further, if this implementation doesn't suit your needs, you can define your own
+custom highlighter class. As long as it implements the API you've just seen, it
+can highlight however you choose. For example::
+
+ # In ``myapp/utils.py``...
+ from haystack.utils import Highlighter
+
+ class BorkHighlighter(Highlighter):
+ def render_html(self, highlight_locations=None, start_offset=None, end_offset=None):
+ highlighted_chunk = self.text_block[start_offset:end_offset]
+
+ for word in self.query_words:
+ highlighted_chunk = highlighted_chunk.replace(word, 'Bork!')
+
+ return highlighted_chunk
+
+Then set the ``HAYSTACK_CUSTOM_HIGHLIGHTER`` setting to
+``myapp.utils.BorkHighlighter``. Usage would then look like::
+
+ >>> highlight = BorkHighlighter(my_query)
+ >>> highlight.highlight(my_text)
+ u'Bork! that would be more Bork! in real life.'
+
+Now the ``{% highlight %}`` template tag will also use this highlighter.
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 0000000..9ca34eb
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,117 @@
+Welcome to Haystack!
+====================
+
+Haystack provides modular search for Django. It features a unified, familiar
+API that allows you to plug in different search backends (such as Solr_,
+Elasticsearch_, Whoosh_, Xapian_, etc.) without having to modify your code.
+
+.. _Solr: http://lucene.apache.org/solr/
+.. _Elasticsearch: http://elasticsearch.org/
+.. _Whoosh: https://bitbucket.org/mchaput/whoosh/
+.. _Xapian: http://xapian.org/
+
+
+.. note::
+
+ This documentation represents Haystack 2.x. For old versions of the documentation: `1.2`_, `1.1`_.
+
+.. _`1.2`: http://django-haystack.readthedocs.org/en/v1.2.6/index.html
+.. _`1.1`: http://django-haystack.readthedocs.org/en/v1.1/index.html
+
+Getting Started
+---------------
+
+If you're new to Haystack, you may want to start with these documents to get
+you up and running:
+
+.. toctree::
+ :maxdepth: 2
+
+ tutorial
+
+.. toctree::
+ :maxdepth: 1
+
+ views_and_forms
+ templatetags
+ glossary
+ management_commands
+ faq
+ who_uses
+ other_apps
+ installing_search_engines
+ debugging
+
+ migration_from_1_to_2
+ python3
+ contributing
+
+
+Advanced Uses
+-------------
+
+Once you've got Haystack working, here are some of the more complex features
+you may want to include in your application.
+
+.. toctree::
+ :maxdepth: 1
+
+ best_practices
+ highlighting
+ faceting
+ autocomplete
+ boost
+ signal_processors
+ multiple_index
+ rich_content_extraction
+ spatial
+ admin
+
+
+Reference
+---------
+
+If you're an experienced user and are looking for a reference, you may be
+looking for API documentation and advanced usage as detailed in:
+
+.. toctree::
+ :maxdepth: 2
+
+ searchqueryset_api
+ searchindex_api
+ inputtypes
+ searchfield_api
+ searchresult_api
+ searchquery_api
+ searchbackend_api
+
+ architecture_overview
+ backend_support
+ settings
+ utils
+
+
+Developing
+----------
+
+Finally, if you're looking to help out with the development of Haystack,
+the following links should help guide you on running tests and creating
+additional backends:
+
+.. toctree::
+ :maxdepth: 1
+
+ running_tests
+ creating_new_backends
+
+
+Requirements
+------------
+
+Haystack has a relatively easily-met set of requirements.
+
+* Python 2.7+ or Python 3.3+
+* Django 1.6+
+
+Additionally, each backend has its own requirements. You should refer to
+:doc:`installing_search_engines` for more details.
diff --git a/docs/inputtypes.rst b/docs/inputtypes.rst
new file mode 100644
index 0000000..fe839e6
--- /dev/null
+++ b/docs/inputtypes.rst
@@ -0,0 +1,177 @@
+.. _ref-inputtypes:
+
+===========
+Input Types
+===========
+
+Input types allow you to specify more advanced query behavior. They serve as a
+way to alter the query, often in backend-specific ways, without altering your
+Python code; as well as enabling use of more advanced features.
+
+Input types currently are only useful with the ``filter/exclude`` methods on
+``SearchQuerySet``. Expanding this support to other methods is on the roadmap.
+
+
+Available Input Types
+=====================
+
+Included with Haystack are the following input types:
+
+``Raw``
+-------
+
+.. class:: haystack.inputs.Raw
+
+Raw allows you to specify backend-specific query syntax. If Haystack doesn't
+provide a way to access special query functionality, you can make use of this
+input type to pass it along.
+
+Example::
+
+ # Fielded.
+ sqs = SearchQuerySet().filter(author=Raw('daniel OR jones'))
+
+ # Non-fielded.
+ # See ``AltParser`` for a better way to construct this.
+ sqs = SearchQuerySet().filter(content=Raw('{!dismax qf=author mm=1}haystack'))
+
+
+``Clean``
+---------
+
+.. class:: haystack.inputs.Clean
+
+``Clean`` takes standard user (untrusted) input and sanitizes it. It ensures
+that no unintended operators or special characters make it into the query.
+
+This is roughly analogous to Django's ``autoescape`` support.
+
+.. note::
+
+ By default, if you hand a ``SearchQuerySet`` a bare string, it will get
+ wrapped in this class.
+
+Example::
+
+ # This becomes "daniel or jones".
+ sqs = SearchQuerySet().filter(content=Clean('daniel OR jones'))
+
+ # Things like ``:`` & ``/`` get escaped.
+ sqs = SearchQuerySet().filter(url=Clean('http://www.example.com'))
+
+ # Equivalent (automatically wrapped in ``Clean``).
+ sqs = SearchQuerySet().filter(url='http://www.example.com')
+
+
+``Exact``
+---------
+
+.. class:: haystack.inputs.Exact
+
+``Exact`` allows for making sure a phrase is exactly matched, unlike the usual
+``AND`` lookups, where words may be far apart.
+
+Example::
+
+ sqs = SearchQuerySet().filter(author=Exact('n-gram support'))
+
+ # Equivalent.
+ sqs = SearchQuerySet().filter(author__exact='n-gram support')
+
+
+``Not``
+-------
+
+.. class:: haystack.inputs.Not
+
+``Not`` allows negation of the query fragment it wraps. As ``Not`` is a subclass
+of ``Clean``, it will also sanitize the query.
+
+This is generally only used internally. Most people prefer to use the
+``SearchQuerySet.exclude`` method.
+
+Example::
+
+ sqs = SearchQuerySet().filter(author=Not('daniel'))
+
+
+``AutoQuery``
+-------------
+
+.. class:: haystack.inputs.AutoQuery
+
+``AutoQuery`` takes a more complex user query (that includes simple, standard
+query syntax bits) & forms a proper query out of them. It also handles
+sanitizing that query using ``Clean`` to ensure the query doesn't break.
+
+``AutoQuery`` accommodates for handling regular words, NOT-ing words &
+extracting exact phrases.
+
+Example::
+
+ # Against the main text field with an accidental ":" before "search".
+ # Generates a query like ``haystack (NOT whoosh) "fast search"``
+ sqs = SearchQuerySet().filter(content=AutoQuery('haystack -whoosh "fast :search"'))
+
+ # Equivalent.
+ sqs = SearchQuerySet().auto_query('haystack -whoosh "fast :search"')
+
+ # Fielded.
+ sqs = SearchQuerySet().filter(author=AutoQuery('daniel -day -lewis'))
+
+
+``AltParser``
+-------------
+
+.. class:: haystack.inputs.AltParser
+
+``AltParser`` lets you specify that a portion of the query should use a
+separate parser in the search engine. This is search-engine-specific, so it may
+decrease the portability of your app.
+
+Currently only supported under Solr.
+
+Example::
+
+ # DisMax.
+ sqs = SearchQuerySet().filter(content=AltParser('dismax', 'haystack', qf='text', mm=1))
+
+ # Prior to the spatial support, you could do...
+ sqs = SearchQuerySet().filter(content=AltParser('dismax', 'haystack', qf='author', mm=1))
+
+
+Creating Your Own Input Types
+=============================
+
+Building your own input type is relatively simple. All input types are simple
+classes that provide an ``__init__`` & a ``prepare`` method.
+
+The ``__init__`` may accept any ``args/kwargs``, though the typical use usually
+just involves a query string.
+
+The ``prepare`` method lets you alter the query the user provided before it
+becomes of the main query. It is lazy, called as late as possible, right before
+the final query is built & shipped to the engine.
+
+A full, if somewhat silly, example looks like::
+
+ from haystack.inputs import Clean
+
+
+ class NoShoutCaps(Clean):
+ input_type_name = 'no_shout_caps'
+ # This is the default & doesn't need to be specified.
+ post_process = True
+
+ def __init__(self, query_string, **kwargs):
+ # Stash the original, if you need it.
+ self.original = query_string
+ super(NoShoutCaps, self).__init__(query_string, **kwargs)
+
+ def prepare(self, query_obj):
+ # We need a reference to the current ``SearchQuery`` object this
+ # will run against, in case we need backend-specific code.
+ query_string = super(NoShoutCaps, self).prepare(query_obj)
+
+ # Take that, capital letters!
+ return query_string.lower()
diff --git a/docs/installing_search_engines.rst b/docs/installing_search_engines.rst
new file mode 100644
index 0000000..e9599a0
--- /dev/null
+++ b/docs/installing_search_engines.rst
@@ -0,0 +1,222 @@
+.. _ref-installing-search-engines:
+
+=========================
+Installing Search Engines
+=========================
+
+Solr
+====
+
+Official Download Location: http://www.apache.org/dyn/closer.cgi/lucene/solr/
+
+Solr is Java but comes in a pre-packaged form that requires very little other
+than the JRE and Jetty. It's very performant and has an advanced featureset.
+Haystack suggests using Solr 3.5+, though it's possible to get it working on
+Solr 1.4 with a little effort. Installation is relatively simple::
+
+ curl -LO https://archive.apache.org/dist/lucene/solr/4.10.2/solr-4.10.2.tgz
+ tar xvzf solr-4.10.2.tgz
+ cd solr-4.10.2
+ cd example
+ java -jar start.jar
+
+You'll need to revise your schema. You can generate this from your application
+(once Haystack is installed and setup) by running
+``./manage.py build_solr_schema``. Take the output from that command and place
+it in ``solr-4.10.2/example/solr/collection1/conf/schema.xml``. Then restart Solr.
+
+.. note::
+ ``build_solr_schema`` uses a template to generate ``schema.xml``. Haystack
+ provides a default template using some sensible defaults. If you would like
+ to provide your own template, you will need to place it in
+ ``search_configuration/solr.xml``, inside a directory specified by your app's
+ ``TEMPLATE_DIRS`` setting. Examples::
+
+ /myproj/myapp/templates/search_configuration/solr.xml
+ # ...or...
+ /myproj/templates/search_configuration/solr.xml
+
+You'll also need a Solr binding, ``pysolr``. The official ``pysolr`` package,
+distributed via PyPI, is the best version to use (2.1.0+). Place ``pysolr.py``
+somewhere on your ``PYTHONPATH``.
+
+.. note::
+
+ ``pysolr`` has its own dependencies that aren't covered by Haystack. See
+ https://pypi.python.org/pypi/pysolr for the latest documentation.
+
+More Like This
+--------------
+
+To enable the "More Like This" functionality in Haystack, you'll need
+to enable the ``MoreLikeThisHandler``. Add the following line to your
+``solrconfig.xml`` file within the ``config`` tag::
+
+ <requestHandler name="/mlt" class="solr.MoreLikeThisHandler" />
+
+Spelling Suggestions
+--------------------
+
+To enable the spelling suggestion functionality in Haystack, you'll need to
+enable the ``SpellCheckComponent``.
+
+The first thing to do is create a special field on your ``SearchIndex`` class
+that mirrors the ``text`` field, but uses ``FacetCharField``. This disables
+the post-processing that Solr does, which can mess up your suggestions.
+Something like the following is suggested::
+
+ class MySearchIndex(indexes.SearchIndex, indexes.Indexable):
+ text = indexes.CharField(document=True, use_template=True)
+ # ... normal fields then...
+ suggestions = indexes.FacetCharField()
+
+ def prepare(self, obj):
+ prepared_data = super(MySearchIndex, self).prepare(obj)
+ prepared_data['suggestions'] = prepared_data['text']
+ return prepared_data
+
+Then, you enable it in Solr by adding the following line to your
+``solrconfig.xml`` file within the ``config`` tag::
+
+ <searchComponent name="spellcheck" class="solr.SpellCheckComponent">
+
+ <str name="queryAnalyzerFieldType">textSpell</str>
+
+ <lst name="spellchecker">
+ <str name="name">default</str>
+ <str name="field">suggestions</str>
+ <str name="spellcheckIndexDir">./spellchecker1</str>
+ <str name="buildOnCommit">true</str>
+ </lst>
+ </searchComponent>
+
+Then change your default handler from::
+
+ <requestHandler name="standard" class="solr.StandardRequestHandler" default="true" />
+
+... to ...::
+
+ <requestHandler name="standard" class="solr.StandardRequestHandler" default="true">
+ <arr name="last-components">
+ <str>spellcheck</str>
+ </arr>
+ </requestHandler>
+
+Be warned that the ``<str name="field">suggestions</str>`` portion will be specific to
+your ``SearchIndex`` classes (in this case, assuming the main field is called
+``text``).
+
+
+Elasticsearch
+=============
+
+Official Download Location: http://www.elasticsearch.org/download/
+
+Elasticsearch is Java but comes in a pre-packaged form that requires very
+little other than the JRE. It's also very performant, scales easily and has
+an advanced featureset. Haystack requires at least version 0.90.0+.
+Installation is best done using a package manager::
+
+ # On Mac OS X...
+ brew install elasticsearch
+
+ # On Ubuntu...
+ apt-get install elasticsearch
+
+ # Then start via:
+ elasticsearch -f -D es.config=<path to YAML config>
+
+ # Example:
+ elasticsearch -f -D es.config=/usr/local/Cellar/elasticsearch/0.90.0/config/elasticsearch.yml
+
+You may have to alter the configuration to run on ``localhost`` when developing
+locally. Modifications should be done in a YAML file, the stock one being
+``config/elasticsearch.yml``::
+
+ # Unicast Discovery (disable multicast)
+ discovery.zen.ping.multicast.enabled: false
+ discovery.zen.ping.unicast.hosts: ["127.0.0.1"]
+
+ # Name your cluster here to whatever.
+ # My machine is called "Venus", so...
+ cluster:
+ name: venus
+
+ network:
+ host: 127.0.0.1
+
+ path:
+ logs: /usr/local/var/log
+ data: /usr/local/var/data
+
+You'll also need an Elasticsearch binding: elasticsearch-py_ (**NOT**
+``pyes``). Place ``elasticsearch`` somewhere on your ``PYTHONPATH``
+(usually ``python setup.py install`` or ``pip install elasticsearch``).
+
+.. _elasticsearch-py: http://pypi.python.org/pypi/elasticsearch/
+
+.. note::
+
+ Elasticsearch 1.0 is slightly backwards incompatible so you need to make sure
+ you have the proper version of `elasticsearch-py` installed - releases with
+ major version 1 (1.X.Y) are to be used with Elasticsearch 1.0 and later, 0.4
+ releases are meant to work with Elasticsearch 0.90.X.
+
+.. note::
+
+ ``elasticsearch`` has its own dependencies that aren't covered by
+ Haystack. You'll also need ``urllib3``.
+
+
+Whoosh
+======
+
+Official Download Location: http://bitbucket.org/mchaput/whoosh/
+
+Whoosh is pure Python, so it's a great option for getting started quickly and
+for development, though it does work for small scale live deployments. The
+current recommended version is 1.3.1+. You can install via PyPI_ using
+``sudo easy_install whoosh`` or ``sudo pip install whoosh``.
+
+Note that, while capable otherwise, the Whoosh backend does not currently
+support "More Like This" or faceting. Support for these features has recently
+been added to Whoosh itself & may be present in a future release.
+
+.. _PyPI: http://pypi.python.org/pypi/Whoosh/
+
+
+Xapian
+======
+
+Official Download Location: http://xapian.org/download
+
+Xapian is written in C++ so it requires compilation (unless your OS has a
+package for it). Installation looks like::
+
+ curl -O http://oligarchy.co.uk/xapian/1.2.18/xapian-core-1.2.18.tar.xz
+ curl -O http://oligarchy.co.uk/xapian/1.2.18/xapian-bindings-1.2.18.tar.xz
+
+ unxz xapian-core-1.2.18.tar.xz
+ unxz xapian-bindings-1.2.18.tar.xz
+
+ tar xvf xapian-core-1.2.18.tar
+ tar xvf xapian-bindings-1.2.18.tar
+
+ cd xapian-core-1.2.18
+ ./configure
+ make
+ sudo make install
+
+ cd ..
+ cd xapian-bindings-1.2.18
+ ./configure
+ make
+ sudo make install
+
+Xapian is a third-party supported backend. It is not included in Haystack
+proper due to licensing. To use it, you need both Haystack itself as well as
+``xapian-haystack``. You can download the source from
+http://github.com/notanumber/xapian-haystack/tree/master. Installation
+instructions can be found on that page as well. The backend, written
+by David Sauve (notanumber), fully implements the `SearchQuerySet` API and is
+an excellent alternative to Solr.
diff --git a/docs/management_commands.rst b/docs/management_commands.rst
new file mode 100644
index 0000000..e167923
--- /dev/null
+++ b/docs/management_commands.rst
@@ -0,0 +1,201 @@
+.. _ref-management-commands:
+
+===================
+Management Commands
+===================
+
+Haystack comes with several management commands to make working with Haystack
+easier.
+
+
+``clear_index``
+===============
+
+The ``clear_index`` command wipes out your entire search index. Use with
+caution. In addition to the standard management command options, it accepts the
+following arguments::
+
+ ``--noinput``:
+ If provided, the interactive prompts are skipped and the index is
+ uncerimoniously wiped out.
+ ``--verbosity``:
+ Accepted but ignored.
+ ``--using``:
+ If provided, determines which connection should be used. Default is
+ ``default``.
+ ``--nocommit``:
+ If provided, it will pass commit=False to the backend. This means that the
+ update will not become immediately visible and will depend on another explicit commit
+ or the backend's commit strategy to complete the update.
+
+By default, this is an **INTERACTIVE** command and assumes that you do **NOT**
+wish to delete the entire index.
+
+.. note::
+
+ The ``--nocommit`` argument is only supported by the Solr backend.
+
+.. warning::
+
+ Depending on the backend you're using, this may simply delete the entire
+ directory, so be sure your ``HAYSTACK_CONNECTIONS[<alias>]['PATH']`` setting is correctly
+ pointed at just the index directory.
+
+
+``update_index``
+================
+
+.. note::
+
+ If you use the ``--start/--end`` flags on this command, you'll need to
+ install dateutil_ to handle the datetime parsing.
+
+ .. _dateutil: http://pypi.python.org/pypi/python-dateutil/1.5
+
+The ``update_index`` command will freshen all of the content in your index. It
+iterates through all indexed models and updates the records in the index. In
+addition to the standard management command options, it accepts the following
+arguments::
+
+ ``--age``:
+ Number of hours back to consider objects new. Useful for nightly
+ reindexes (``--age=24``). Requires ``SearchIndexes`` to implement
+ the ``get_updated_field`` method. Default is ``None``.
+ ``--start``:
+ The start date for indexing within. Can be any dateutil-parsable string,
+ recommended to be YYYY-MM-DDTHH:MM:SS. Requires ``SearchIndexes`` to
+ implement the ``get_updated_field`` method. Default is ``None``.
+ ``--end``:
+ The end date for indexing within. Can be any dateutil-parsable string,
+ recommended to be YYYY-MM-DDTHH:MM:SS. Requires ``SearchIndexes`` to
+ implement the ``get_updated_field`` method. Default is ``None``.
+ ``--batch-size``:
+ Number of items to index at once. Default is 1000.
+ ``--remove``:
+ Remove objects from the index that are no longer present in the
+ database.
+ ``--workers``:
+ Allows for the use multiple workers to parallelize indexing. Requires
+ ``multiprocessing``.
+ ``--verbosity``:
+ If provided, dumps out more information about what's being done.
+
+ * ``0`` = No output
+ * ``1`` = Minimal output describing what models were indexed
+ and how many records.
+ * ``2`` = Full output, including everything from ``1`` plus output
+ on each batch that is indexed, which is useful when debugging.
+ ``--using``:
+ If provided, determines which connection should be used. Default is
+ ``default``.
+ ``--nocommit``:
+ If provided, it will pass commit=False to the backend. This means that the
+ updates will not become immediately visible and will depend on another explicit commit
+ or the backend's commit strategy to complete the update.
+
+.. note::
+
+ The ``--nocommit`` argument is only supported by the Solr and Elasticsearch backends.
+
+Examples::
+
+ # Update everything.
+ ./manage.py update_index --settings=settings.prod
+
+ # Update everything with lots of information about what's going on.
+ ./manage.py update_index --settings=settings.prod --verbosity=2
+
+ # Update everything, cleaning up after deleted models.
+ ./manage.py update_index --remove --settings=settings.prod
+
+ # Update everything changed in the last 2 hours.
+ ./manage.py update_index --age=2 --settings=settings.prod
+
+ # Update everything between Dec. 1, 2011 & Dec 31, 2011
+ ./manage.py update_index --start='2011-12-01T00:00:00' --end='2011-12-31T23:59:59' --settings=settings.prod
+
+ # Update just a couple apps.
+ ./manage.py update_index blog auth comments --settings=settings.prod
+
+ # Update just a single model (in a complex app).
+ ./manage.py update_index auth.User --settings=settings.prod
+
+ # Crazy Go-Nuts University
+ ./manage.py update_index events.Event media news.Story --start='2011-01-01T00:00:00 --remove --using=hotbackup --workers=12 --verbosity=2 --settings=settings.prod
+
+.. note::
+
+ This command *ONLY* updates records in the index. It does *NOT* handle
+ deletions unless the ``--remove`` flag is provided. You might consider
+ a queue consumer if the memory requirements for ``--remove`` don't
+ fit your needs. Alternatively, you can use the
+ ``RealtimeSignalProcessor``, which will automatically handle deletions.
+
+
+``rebuild_index``
+=================
+
+A shortcut for ``clear_index`` followed by ``update_index``. It accepts any/all
+of the arguments of the following arguments::
+
+ ``--age``:
+ Number of hours back to consider objects new. Useful for nightly
+ reindexes (``--age=24``). Requires ``SearchIndexes`` to implement
+ the ``get_updated_field`` method.
+ ``--batch-size``:
+ Number of items to index at once. Default is 1000.
+ ``--site``:
+ The site object to use when reindexing (like `search_sites.mysite`).
+ ``--noinput``:
+ If provided, the interactive prompts are skipped and the index is
+ uncerimoniously wiped out.
+ ``--remove``:
+ Remove objects from the index that are no longer present in the
+ database.
+ ``--verbosity``:
+ If provided, dumps out more information about what's being done.
+
+ * ``0`` = No output
+ * ``1`` = Minimal output describing what models were indexed
+ and how many records.
+ * ``2`` = Full output, including everything from ``1`` plus output
+ on each batch that is indexed, which is useful when debugging.
+ ``--using``:
+ If provided, determines which connection should be used. Default is
+ ``default``.
+ ``--nocommit``:
+ If provided, it will pass commit=False to the backend. This means that the
+ update will not become immediately visible and will depend on another explicit commit
+ or the backend's commit strategy to complete the update.
+
+For when you really, really want a completely rebuilt index.
+
+
+``build_solr_schema``
+=====================
+
+Once all of your ``SearchIndex`` classes are in place, this command can be used
+to generate the XML schema Solr needs to handle the search data. It accepts the
+following arguments::
+
+ ``--filename``:
+ If provided, directs output to a file instead of stdout.
+ ``--using``:
+ If provided, determines which connection should be used. Default is
+ ``default``.
+
+.. warning::
+
+ This command does NOT update the ``schema.xml`` file for you. You either
+ have to specify a ``filename`` flag or have to
+ copy-paste (or redirect) the output to the correct file. Haystack has no
+ way of knowing where your Solr is setup (or if it's even on the same
+ machine), hence the manual step.
+
+
+``haystack_info``
+=================
+
+Provides some basic information about how Haystack is setup and what models it
+is handling. It accepts no arguments. Useful when debugging or when using
+Haystack-enabled third-party apps.
diff --git a/docs/migration_from_1_to_2.rst b/docs/migration_from_1_to_2.rst
new file mode 100644
index 0000000..6159e06
--- /dev/null
+++ b/docs/migration_from_1_to_2.rst
@@ -0,0 +1,285 @@
+.. _ref-migration_from_1_to_2:
+
+===========================================
+Migrating From Haystack 1.X to Haystack 2.X
+===========================================
+
+Haystack introduced several backward-incompatible changes in the process of
+moving from the 1.X series to the 2.X series. These were done to clean up the
+API, to support new features & to clean up problems in 1.X. At a high level,
+they consisted of:
+
+* The removal of ``SearchSite`` & ``haystack.site``.
+* The removal of ``handle_registrations`` & ``autodiscover``.
+* The addition of multiple index support.
+* The addition of ``SignalProcessors`` & the removal of ``RealTimeSearchIndex``.
+* The removal/renaming of various settings.
+
+This guide will help you make the changes needed to be compatible with Haystack
+2.X.
+
+
+Settings
+========
+
+Most prominently, the old way of specifying a backend & its settings has changed
+to support the multiple index feature. A complete Haystack 1.X example might
+look like::
+
+ HAYSTACK_SEARCH_ENGINE = 'solr'
+ HAYSTACK_SOLR_URL = 'http://localhost:9001/solr/default'
+ HAYSTACK_SOLR_TIMEOUT = 60 * 5
+ HAYSTACK_INCLUDE_SPELLING = True
+ HAYSTACK_BATCH_SIZE = 100
+
+ # Or...
+ HAYSTACK_SEARCH_ENGINE = 'whoosh'
+ HAYSTACK_WHOOSH_PATH = '/home/search/whoosh_index'
+ HAYSTACK_WHOOSH_STORAGE = 'file'
+ HAYSTACK_WHOOSH_POST_LIMIT = 128 * 1024 * 1024
+ HAYSTACK_INCLUDE_SPELLING = True
+ HAYSTACK_BATCH_SIZE = 100
+
+ # Or...
+ HAYSTACK_SEARCH_ENGINE = 'xapian'
+ HAYSTACK_XAPIAN_PATH = '/home/search/xapian_index'
+ HAYSTACK_INCLUDE_SPELLING = True
+ HAYSTACK_BATCH_SIZE = 100
+
+In Haystack 2.X, you can now supply as many backends as you like, so all of the
+above settings can now be active at the same time. A translated set of settings
+would look like::
+
+ HAYSTACK_CONNECTIONS = {
+ 'default': {
+ 'ENGINE': 'haystack.backends.solr_backend.SolrEngine',
+ 'URL': 'http://localhost:9001/solr/default',
+ 'TIMEOUT': 60 * 5,
+ 'INCLUDE_SPELLING': True,
+ 'BATCH_SIZE': 100,
+ },
+ 'autocomplete': {
+ 'ENGINE': 'haystack.backends.whoosh_backend.WhooshEngine',
+ 'PATH': '/home/search/whoosh_index',
+ 'STORAGE': 'file',
+ 'POST_LIMIT': 128 * 1024 * 1024,
+ 'INCLUDE_SPELLING': True,
+ 'BATCH_SIZE': 100,
+ },
+ 'slave': {
+ 'ENGINE': 'xapian_backend.XapianEngine',
+ 'PATH': '/home/search/xapian_index',
+ 'INCLUDE_SPELLING': True,
+ 'BATCH_SIZE': 100,
+ },
+ }
+
+You are required to have at least one connection listed within
+``HAYSTACK_CONNECTIONS``, it must be named ``default`` & it must have a valid
+``ENGINE`` within it. Bare minimum looks like::
+
+ HAYSTACK_CONNECTIONS = {
+ 'default': {
+ 'ENGINE': 'haystack.backends.simple_backend.SimpleEngine'
+ }
+ }
+
+The key for each backend is an identifier you use to describe the backend within
+your app. You should refer to the :ref:`ref-multiple_index` documentation for
+more information on using the new multiple indexes & routing features.
+
+Also note that the ``ENGINE`` setting has changed from a lowercase "short name"
+of the engine to a full path to a new ``Engine`` class within the backend.
+Available options are:
+
+* ``haystack.backends.solr_backend.SolrEngine``
+* ``haystack.backends.whoosh_backend.WhooshEngine``
+* ``haystack.backends.simple_backend.SimpleEngine``
+
+Additionally, the following settings were outright removed & will generate
+an exception if found:
+
+* ``HAYSTACK_SITECONF`` - Remove this setting & the file it pointed to.
+* ``HAYSTACK_ENABLE_REGISTRATIONS``
+* ``HAYSTACK_INCLUDE_SPELLING``
+
+
+Backends
+========
+
+The ``dummy`` backend was outright removed from Haystack, as it served very
+little use after the ``simple`` (pure-ORM-powered) backend was introduced.
+
+If you wrote a custom backend, please refer to the "Custom Backends" section
+below.
+
+
+Indexes
+=======
+
+The other major changes affect the ``SearchIndex`` class. As the concept of
+``haystack.site`` & ``SearchSite`` are gone, you'll need to modify your indexes.
+
+A Haystack 1.X index might've looked like::
+
+ import datetime
+ from haystack.indexes import *
+ from haystack import site
+ from myapp.models import Note
+
+
+ class NoteIndex(SearchIndex):
+ text = CharField(document=True, use_template=True)
+ author = CharField(model_attr='user')
+ pub_date = DateTimeField(model_attr='pub_date')
+
+ def get_queryset(self):
+ """Used when the entire index for model is updated."""
+ return Note.objects.filter(pub_date__lte=datetime.datetime.now())
+
+
+ site.register(Note, NoteIndex)
+
+A converted Haystack 2.X index should look like::
+
+ import datetime
+ from haystack import indexes
+ from myapp.models import Note
+
+
+ class NoteIndex(indexes.SearchIndex, indexes.Indexable):
+ text = indexes.CharField(document=True, use_template=True)
+ author = indexes.CharField(model_attr='user')
+ pub_date = indexes.DateTimeField(model_attr='pub_date')
+
+ def get_model(self):
+ return Note
+
+ def index_queryset(self, using=None):
+ """Used when the entire index for model is updated."""
+ return self.get_model().objects.filter(pub_date__lte=datetime.datetime.now())
+
+Note the import on ``site`` & the registration statements are gone. Newly added
+are is the ``NoteIndex.get_model`` method. This is a **required** method &
+should simply return the ``Model`` class the index is for.
+
+There's also a new, additional class added to the ``class`` definition. The
+``indexes.Indexable`` class is a simple mixin that serves to identify the
+classes Haystack should automatically discover & use. If you have a custom
+base class (say ``QueuedSearchIndex``) that other indexes inherit from, simply
+leave the ``indexes.Indexable`` off that declaration & Haystack won't try to
+use it.
+
+Additionally, the name of the ``document=True`` field is now enforced to be
+``text`` across all indexes. If you need it named something else, you should
+set the ``HAYSTACK_DOCUMENT_FIELD`` setting. For example::
+
+ HAYSTACK_DOCUMENT_FIELD = 'pink_polka_dot'
+
+Finally, the ``index_queryset`` method should supplant the ``get_queryset``
+method. This was present in the Haystack 1.2.X series (with a deprecation warning
+in 1.2.4+) but has been removed in Haystack v2.
+
+Finally, if you were unregistering other indexes before, you should make use of
+the new ``EXCLUDED_INDEXES`` setting available in each backend's settings. It
+should be a list of strings that contain the Python import path to the indexes
+that should not be loaded & used. For example::
+
+ HAYSTACK_CONNECTIONS = {
+ 'default': {
+ 'ENGINE': 'haystack.backends.solr_backend.SolrEngine',
+ 'URL': 'http://localhost:9001/solr/default',
+ 'EXCLUDED_INDEXES': [
+ # Imagine that these indexes exist. They don't.
+ 'django.contrib.auth.search_indexes.UserIndex',
+ 'third_party_blog_app.search_indexes.EntryIndex',
+ ]
+ }
+ }
+
+This allows for reliable swapping of the index that handles a model without
+relying on correct import order.
+
+
+Removal of ``RealTimeSearchIndex``
+==================================
+
+Use of the ``haystack.indexes.RealTimeSearchIndex`` is no longer valid. It has
+been removed in favor of ``RealtimeSignalProcessor``. To migrate, first change
+the inheritance of all your ``RealTimeSearchIndex`` subclasses to use
+``SearchIndex`` instead::
+
+ # Old.
+ class MySearchIndex(indexes.RealTimeSearchIndex, indexes.Indexable):
+ # ...
+
+
+ # New.
+ class MySearchIndex(indexes.SearchIndex, indexes.Indexable):
+ # ...
+
+Then update your settings to enable use of the ``RealtimeSignalProcessor``::
+
+ HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.RealtimeSignalProcessor'
+
+
+Done!
+=====
+
+For most basic uses of Haystack, this is all that is necessary to work with
+Haystack 2.X. You should rebuild your index if needed & test your new setup.
+
+
+Advanced Uses
+=============
+
+Swapping Backend
+----------------
+
+If you were manually swapping the ``SearchQuery`` or ``SearchBackend`` being
+used by ``SearchQuerySet`` in the past, it's now preferable to simply setup
+another connection & use the ``SearchQuerySet.using`` method to select that
+connection instead.
+
+Also, if you were manually instantiating ``SearchBackend`` or ``SearchQuery``,
+it's now preferable to rely on the connection's engine to return the right
+thing. For example::
+
+ from haystack import connections
+ backend = connections['default'].get_backend()
+ query = connections['default'].get_query()
+
+
+Custom Backends
+---------------
+
+If you had written a custom ``SearchBackend`` and/or custom ``SearchQuery``,
+there's a little more work needed to be Haystack 2.X compatible.
+
+You should, but don't have to, rename your ``SearchBackend`` & ``SearchQuery``
+classes to be more descriptive/less collide-y. For example,
+``solr_backend.SearchBackend`` became ``solr_backend.SolrSearchBackend``. This
+prevents non-namespaced imports from stomping on each other.
+
+You need to add a new class to your backend, subclassing ``BaseEngine``. This
+allows specifying what ``backend`` & ``query`` should be used on a connection
+with less duplication/naming trickery. It goes at the bottom of the file (so
+that the classes are defined above it) and should look like::
+
+ from haystack.backends import BaseEngine
+ from haystack.backends.solr_backend import SolrSearchQuery
+
+ # Code then...
+
+ class MyCustomSolrEngine(BaseEngine):
+ # Use our custom backend.
+ backend = MySolrBackend
+ # Use the built-in Solr query.
+ query = SolrSearchQuery
+
+Your ``HAYSTACK_CONNECTIONS['default']['ENGINE']`` should then point to the
+full Python import path to your new ``BaseEngine`` subclass.
+
+Finally, you will likely have to adjust the ``SearchBackend.__init__`` &
+``SearchQuery.__init__``, as they have changed significantly. Please refer to
+the commits for those backends.
diff --git a/docs/multiple_index.rst b/docs/multiple_index.rst
new file mode 100644
index 0000000..c51b734
--- /dev/null
+++ b/docs/multiple_index.rst
@@ -0,0 +1,201 @@
+.. _ref-multiple_index:
+
+================
+Multiple Indexes
+================
+
+Much like Django's `multiple database support`_, Haystack has "multiple index"
+support. This allows you to talk to several different engines at the same time.
+It enables things like master-slave setups, multiple language indexing,
+separate indexes for general search & autocomplete as well as other options.
+
+.. _`multiple database support`: http://docs.djangoproject.com/en/1.3/topics/db/multi-db/
+
+
+Specifying Available Connections
+================================
+
+You can supply as many backends as you like, each with a descriptive name. A
+complete setup that accesses all backends might look like::
+
+ HAYSTACK_CONNECTIONS = {
+ 'default': {
+ 'ENGINE': 'haystack.backends.solr_backend.SolrEngine',
+ 'URL': 'http://localhost:9001/solr/default',
+ 'TIMEOUT': 60 * 5,
+ 'INCLUDE_SPELLING': True,
+ 'BATCH_SIZE': 100,
+ 'SILENTLY_FAIL': True,
+ },
+ 'autocomplete': {
+ 'ENGINE': 'haystack.backends.whoosh_backend.WhooshEngine',
+ 'PATH': '/home/search/whoosh_index',
+ 'STORAGE': 'file',
+ 'POST_LIMIT': 128 * 1024 * 1024,
+ 'INCLUDE_SPELLING': True,
+ 'BATCH_SIZE': 100,
+ 'SILENTLY_FAIL': True,
+ },
+ 'slave': {
+ 'ENGINE': 'xapian_backend.XapianEngine',
+ 'PATH': '/home/search/xapian_index',
+ 'INCLUDE_SPELLING': True,
+ 'BATCH_SIZE': 100,
+ 'SILENTLY_FAIL': True,
+ },
+ 'db': {
+ 'ENGINE': 'haystack.backends.simple_backend.SimpleEngine',
+ 'SILENTLY_FAIL': True,
+ }
+ }
+
+You are required to have at least one connection listed within
+``HAYSTACK_CONNECTIONS``, it must be named ``default`` & it must have a valid
+``ENGINE`` within it.
+
+
+Management Commands
+===================
+
+All management commands that manipulate data use **ONLY** one connection at a
+time. By default, they use the ``default`` index but accept a ``--using`` flag
+to specify a different connection. For example::
+
+ ./manage.py rebuild_index --noinput --using=whoosh
+
+
+Automatic Routing
+=================
+
+To make the selection of the correct index easier, Haystack (like Django) has
+the concept of "routers". All provided routers are checked whenever a read or
+write happens, stopping at the first router that knows how to handle it.
+
+Haystack ships with a ``DefaultRouter`` enabled. It looks like::
+
+ class DefaultRouter(BaseRouter):
+ def for_read(self, **hints):
+ return DEFAULT_ALIAS
+
+ def for_write(self, **hints):
+ return DEFAULT_ALIAS
+
+On a read (when a search query is executed), the ``DefaultRouter.for_read``
+method is checked & returns the ``DEFAULT_ALIAS`` (which is ``default``),
+telling whatever requested it that it should perform the query against the
+``default`` connection. The same process is followed for writes.
+
+If the ``for_read`` or ``for_write`` method returns ``None``, that indicates
+that the current router can't handle the data. The next router is then checked.
+
+The ``hints`` passed can be anything that helps the router make a decision. This
+data should always be considered optional & be guarded against. At current,
+``for_write`` receives an ``index`` option (pointing to the ``SearchIndex``
+calling it) while ``for_read`` may receive ``models`` (being a list of ``Model``
+classes the ``SearchQuerySet`` may be looking at).
+
+You may provide as many routers as you like by overriding the
+``HAYSTACK_ROUTERS`` setting. For example::
+
+ HAYSTACK_ROUTERS = ['myapp.routers.MasterRouter', 'myapp.routers.SlaveRouter', 'haystack.routers.DefaultRouter']
+
+
+Master-Slave Example
+--------------------
+
+The ``MasterRouter`` & ``SlaveRouter`` might look like::
+
+ from haystack import routers
+
+
+ class MasterRouter(routers.BaseRouter):
+ def for_write(self, **hints):
+ return 'master'
+
+ def for_read(self, **hints):
+ return None
+
+
+ class SlaveRouter(routers.BaseRouter):
+ def for_write(self, **hints):
+ return None
+
+ def for_read(self, **hints):
+ return 'slave'
+
+The observant might notice that since the methods don't overlap, this could be
+combined into one ``Router`` like so::
+
+ from haystack import routers
+
+
+ class MasterSlaveRouter(routers.BaseRouter):
+ def for_write(self, **hints):
+ return 'master'
+
+ def for_read(self, **hints):
+ return 'slave'
+
+
+Manually Selecting
+==================
+
+There may be times when automatic selection of the correct index is undesirable,
+such as when fixing erroneous data in an index or when you know exactly where
+data should be located.
+
+For this, the ``SearchQuerySet`` class allows for manually selecting the index
+via the ``SearchQuerySet.using`` method::
+
+ from haystack.query import SearchQuerySet
+
+ # Uses the routers' opinion.
+ sqs = SearchQuerySet().auto_query('banana')
+
+ # Forces the default.
+ sqs = SearchQuerySet().using('default').auto_query('banana')
+
+ # Forces the slave connection (presuming it was setup).
+ sqs = SearchQuerySet().using('slave').auto_query('banana')
+
+.. warning::
+
+ Note that the models a ``SearchQuerySet`` is trying to pull from must all come
+ from the same index. Haystack is not able to combine search queries against
+ different indexes.
+
+
+Custom Index Selection
+======================
+
+If a specific backend has been selected, the ``SearchIndex.index_queryset`` and
+``SearchIndex.read_queryset`` will receive the backend name, giving indexes the
+opportunity to customize the returned queryset.
+
+For example, a site which uses separate indexes for recent items and older
+content might define ``index_queryset`` to filter the items based on date::
+
+ def index_queryset(self, using=None):
+ qs = Note.objects.all()
+ archive_limit = datetime.datetime.now() - datetime.timedelta(days=90)
+
+ if using == "archive":
+ return qs.filter(pub_date__lte=archive_limit)
+ else:
+ return qs.filter(pub_date__gte=archive_limit)
+
+
+Multi-lingual Content
+---------------------
+
+Most search engines require you to set the language at the index level. For
+example, a multi-lingual site using Solr can use `multiple cores <http://wiki.apache.org/solr/CoreAdmin>`_ and corresponding Haystack
+backends using the language name. Under this scenario, queries are simple::
+
+ sqs = SearchQuerySet.using(lang).auto_query(…)
+
+During index updates, the Index's ``index_queryset`` method will need to filter
+the items to avoid sending the wrong content to the search engine::
+
+ def index_queryset(self, using=None):
+ return Post.objects.filter(language=using)
diff --git a/docs/other_apps.rst b/docs/other_apps.rst
new file mode 100644
index 0000000..e9751ff
--- /dev/null
+++ b/docs/other_apps.rst
@@ -0,0 +1,98 @@
+.. _ref-other_apps:
+
+=============================
+Haystack-Related Applications
+=============================
+
+Sub Apps
+========
+
+These are apps that build on top of the infrastructure provided by Haystack.
+Useful for essentially extending what Haystack can do.
+
+queued_search
+-------------
+
+http://github.com/toastdriven/queued_search (2.X compatible)
+
+Provides a queue-based setup as an alternative to ``RealtimeSignalProcessor`` or
+constantly running the ``update_index`` command. Useful for high-load, short
+update time situations.
+
+celery-haystack
+---------------
+
+https://github.com/jezdez/celery-haystack (1.X and 2.X compatible)
+
+Also provides a queue-based setup, this time centered around Celery. Useful
+for keeping the index fresh per model instance or with the included task
+to call the ``update_index`` management command instead.
+
+haystack-rqueue
+---------------
+
+https://github.com/mandx/haystack-rqueue (2.X compatible)
+
+Also provides a queue-based setup, this time centered around RQ. Useful
+for keeping the index fresh using ``./manage.py rqworker``.
+
+django-celery-haystack
+----------------------
+
+https://github.com/mixcloud/django-celery-haystack-SearchIndex
+
+Another queue-based setup, also around Celery. Useful
+for keeping the index fresh.
+
+saved_searches
+--------------
+
+http://github.com/toastdriven/saved_searches (2.X compatible)
+
+Adds personalization to search. Retains a history of queries run by the various
+users on the site (including anonymous users). This can be used to present the
+user with their search history and provide most popular/most recent queries
+on the site.
+
+saved-search
+------------
+
+https://github.com/DirectEmployers/saved-search
+
+An alternate take on persisting user searches, this has a stronger focus
+on locale-based searches as well as further integration.
+
+haystack-static-pages
+---------------------
+
+http://github.com/trapeze/haystack-static-pages
+
+Provides a simple way to index flat (non-model-based) content on your site.
+By using the management command that comes with it, it can crawl all pertinent
+pages on your site and add them to search.
+
+django-tumbleweed
+-----------------
+
+http://github.com/mcroydon/django-tumbleweed
+
+Provides a tumblelog-like view to any/all Haystack-enabled models on your
+site. Useful for presenting date-based views of search data. Attempts to avoid
+the database completely where possible.
+
+
+Haystack-Enabled Apps
+=====================
+
+These are reusable apps that ship with ``SearchIndexes``, suitable for quick
+integration with Haystack.
+
+* django-faq (freq. asked questions app) - http://github.com/benspaulding/django-faq
+* django-essays (blog-like essay app) - http://github.com/bkeating/django-essays
+* gtalug (variety of apps) - http://github.com/myles/gtalug
+* sciencemuseum (science museum open data) - http://github.com/simonw/sciencemuseum
+* vz-wiki (wiki) - http://github.com/jobscry/vz-wiki
+* ffmff (events app) - http://github.com/stefreak/ffmff
+* Dinette (forums app) - http://github.com/uswaretech/Dinette
+* fiftystates_site (site) - http://github.com/sunlightlabs/fiftystates_site
+* Open-Knesset (site) - http://github.com/ofri/Open-Knesset
diff --git a/docs/python3.rst b/docs/python3.rst
new file mode 100644
index 0000000..310ced2
--- /dev/null
+++ b/docs/python3.rst
@@ -0,0 +1,47 @@
+.. _ref-python3:
+
+================
+Python 3 Support
+================
+
+As of Haystack v2.1.0, it has been ported to support both Python 2 & Python 3
+within the same codebase. This builds on top of what `six`_ & `Django`_ provide.
+
+No changes are required for anyone running an existing Haystack
+installation. The API is completely backward-compatible, so you should be able
+to run your existing software without modification.
+
+Virtually all tests pass under both Python 2 & 3, with a small number of
+expected failures under Python (typically related to ordering, see below).
+
+.. _`six`: http://pythonhosted.org/six/
+.. _`Django`: https://docs.djangoproject.com/en/1.5/topics/python3/#str-and-unicode-methods
+
+
+Supported Backends
+==================
+
+The following backends are fully supported under Python 3. However, you may
+need to update these dependencies if you have a pre-existing setup.
+
+* Solr (pysolr>=3.1.0)
+* Elasticsearch
+
+
+Notes
+=====
+
+Testing
+-------
+
+If you were testing things such as the query generated by a given
+``SearchQuerySet`` or how your forms would render, under Python 3.3.2+,
+`hash randomization`_ is in effect, which means that the ordering of
+dictionaries is no longer consistent, even on the same platform.
+
+Haystack took the approach of abandoning making assertions about the entire
+structure. Instead, we either simply assert that the new object contains the
+right things or make a call to ``sorted(...)`` around it to ensure order. It is
+recommended you take a similar approach.
+
+.. _`hash randomization`: http://docs.python.org/3/whatsnew/3.3.html#builtin-functions-and-types
diff --git a/docs/rich_content_extraction.rst b/docs/rich_content_extraction.rst
new file mode 100644
index 0000000..a23c85d
--- /dev/null
+++ b/docs/rich_content_extraction.rst
@@ -0,0 +1,68 @@
+.. _ref-rich_content_extraction:
+
+=======================
+Rich Content Extraction
+=======================
+
+For some projects it is desirable to index text content which is stored in
+structured files such as PDFs, Microsoft Office documents, images, etc.
+Currently only Solr's `ExtractingRequestHandler`_ is directly supported by
+Haystack but the approach below could be used with any backend which supports
+this feature.
+
+.. _`ExtractingRequestHandler`: http://wiki.apache.org/solr/ExtractingRequestHandler
+
+Extracting Content
+==================
+
+:meth:`SearchBackend.extract_file_contents` accepts a file or file-like object
+and returns a dictionary containing two keys: ``metadata`` and ``contents``. The
+``contents`` value will be a string containing all of the text which the backend
+managed to extract from the file contents. ``metadata`` will always be a
+dictionary but the keys and values will vary based on the underlying extraction
+engine and the type of file provided.
+
+Indexing Extracted Content
+==========================
+
+Generally you will want to include the extracted text in your main document
+field along with everything else specified in your search template. This example
+shows how to override a hypothetical ``FileIndex``'s ``prepare`` method to
+include the extract content along with information retrieved from the database::
+
+ def prepare(self, obj):
+ data = super(FileIndex, self).prepare(obj)
+
+ # This could also be a regular Python open() call, a StringIO instance
+ # or the result of opening a URL. Note that due to a library limitation
+ # file_obj must have a .name attribute even if you need to set one
+ # manually before calling extract_file_contents:
+ file_obj = obj.the_file.open()
+
+ extracted_data = self.backend.extract_file_contents(file_obj)
+
+ # Now we'll finally perform the template processing to render the
+ # text field with *all* of our metadata visible for templating:
+ t = loader.select_template(('search/indexes/myapp/file_text.txt', ))
+ data['text'] = t.render(Context({'object': obj,
+ 'extracted': extracted_data}))
+
+ return data
+
+This allows you to insert the extracted text at the appropriate place in your
+template, modified or intermixed with database content as appropriate:
+
+.. code-block:: html+django
+
+ {{ object.title }}
+ {{ object.owner.name }}
+
+ …
+
+ {% for k, v in extracted.metadata.items %}
+ {% for val in v %}
+ {{ k }}: {{ val|safe }}
+ {% endfor %}
+ {% endfor %}
+
+ {{ extracted.contents|striptags|safe }} \ No newline at end of file
diff --git a/docs/running_tests.rst b/docs/running_tests.rst
new file mode 100644
index 0000000..6b928f7
--- /dev/null
+++ b/docs/running_tests.rst
@@ -0,0 +1,70 @@
+.. _ref-running-tests:
+
+=============
+Running Tests
+=============
+
+Everything
+==========
+
+The simplest way to get up and running with Haystack's tests is to run::
+
+ python setup.py test
+
+This installs all of the backend libraries & all dependencies for getting the
+tests going and runs the tests. You will still have to setup search servers
+(for running Solr tests, the spatial Solr tests & the Elasticsearch tests).
+
+
+Cherry-Picked
+=============
+
+If you'd rather not run all the tests, run only the backends you need since
+tests for backends that are not running will be skipped.
+
+``Haystack`` is maintained with all tests passing at all times, so if you
+receive any errors during testing, please check your setup and file a report if
+the errors persist.
+
+To run just a portion of the tests you can use the script ``run_tests.py`` and
+just specify the files or directories you wish to run, for example::
+
+ cd test_haystack
+ ./run_tests.py whoosh_tests test_loading.py
+
+The ``run_tests.py`` script is just a tiny wrapper around the nose_ library and
+any options you pass to it will be passed on; including ``--help`` to get a
+list of possible options::
+
+ cd test_haystack
+ ./run_tests.py --help
+
+.. _nose: https://nose.readthedocs.org/en/latest/
+
+Configuring Solr
+================
+
+Haystack assumes that you have a Solr server running on port ``9001`` which
+uses the schema and configuration provided in the
+``test_haystack/solr_tests/server/`` directory. For convenience, a script is
+provided which will download, configure and start a test Solr server::
+
+ test_haystack/solr_tests/server/start-solr-test-server.sh
+
+If no server is found all solr-related tests will be skipped.
+
+Configuring Elasticsearch
+=========================
+
+The test suite will try to connect to Elasticsearch on port ``9200``. If no
+server is found all elasticsearch tests will be skipped. Note that the tests
+are destructive - during the teardown phase they will wipe the cluster clean so
+make sure you don't run them against an instance with data you wish to keep.
+
+If you want to run the geo-django tests you may need to review the
+`GeoDjango GEOS and GDAL settings`_ before running these commands::
+
+ cd test_haystack
+ ./run_tests.py elasticsearch_tests
+
+.. _GeoDjango GEOS and GDAL settings: https://docs.djangoproject.com/en/1.7/ref/contrib/gis/install/geolibs/#geos-library-path
diff --git a/docs/searchbackend_api.rst b/docs/searchbackend_api.rst
new file mode 100644
index 0000000..d077fbf
--- /dev/null
+++ b/docs/searchbackend_api.rst
@@ -0,0 +1,124 @@
+.. _ref-searchbackend-api:
+
+=====================
+``SearchBackend`` API
+=====================
+
+.. class:: SearchBackend(connection_alias, **connection_options)
+
+The ``SearchBackend`` class handles interaction directly with the backend. The
+search query it performs is usually fed to it from a ``SearchQuery`` class that
+has been built for that backend.
+
+This class must be at least partially implemented on a per-backend basis and
+is usually accompanied by a ``SearchQuery`` class within the same module.
+
+Unless you are writing a new backend, it is unlikely you need to directly
+access this class.
+
+
+Method Reference
+================
+
+``update``
+----------
+
+.. method:: SearchBackend.update(self, index, iterable)
+
+Updates the backend when given a ``SearchIndex`` and a collection of
+documents.
+
+This method MUST be implemented by each backend, as it will be highly
+specific to each one.
+
+``remove``
+----------
+
+.. method:: SearchBackend.remove(self, obj_or_string)
+
+Removes a document/object from the backend. Can be either a model
+instance or the identifier (i.e. ``app_name.model_name.id``) in the
+event the object no longer exists.
+
+This method MUST be implemented by each backend, as it will be highly
+specific to each one.
+
+``clear``
+---------
+
+.. method:: SearchBackend.clear(self, models=[])
+
+Clears the backend of all documents/objects for a collection of models.
+
+This method MUST be implemented by each backend, as it will be highly
+specific to each one.
+
+``search``
+----------
+
+.. method:: SearchBackend.search(self, query_string, sort_by=None, start_offset=0, end_offset=None, fields='', highlight=False, facets=None, date_facets=None, query_facets=None, narrow_queries=None, spelling_query=None, limit_to_registered_models=None, result_class=None, **kwargs)
+
+Takes a query to search on and returns a dictionary.
+
+The query should be a string that is appropriate syntax for the backend.
+
+The returned dictionary should contain the keys 'results' and 'hits'.
+The 'results' value should be an iterable of populated ``SearchResult``
+objects. The 'hits' should be an integer count of the number of matched
+results the search backend found.
+
+This method MUST be implemented by each backend, as it will be highly
+specific to each one.
+
+``extract_file_contents``
+-------------------------
+
+.. method:: SearchBackend.extract_file_contents(self, file_obj)
+
+Perform text extraction on the provided file or file-like object. Returns either
+None or a dictionary containing the keys ``contents`` and ``metadata``. The
+``contents`` field will always contain the extracted text content returned by
+the underlying search engine but ``metadata`` may vary considerably based on
+the backend and the input file.
+
+``prep_value``
+--------------
+
+.. method:: SearchBackend.prep_value(self, value)
+
+Hook to give the backend a chance to prep an attribute value before
+sending it to the search engine.
+
+By default, just force it to unicode.
+
+``more_like_this``
+------------------
+
+.. method:: SearchBackend.more_like_this(self, model_instance, additional_query_string=None, result_class=None)
+
+Takes a model object and returns results the backend thinks are similar.
+
+This method MUST be implemented by each backend, as it will be highly
+specific to each one.
+
+``build_schema``
+----------------
+
+.. method:: SearchBackend.build_schema(self, fields)
+
+Takes a dictionary of fields and returns schema information.
+
+This method MUST be implemented by each backend, as it will be highly
+specific to each one.
+
+``build_models_list``
+---------------------
+
+.. method:: SearchBackend.build_models_list(self)
+
+Builds a list of models for searching.
+
+The ``search`` method should use this and the ``django_ct`` field to
+narrow the results (unless the user indicates not to). This helps ignore
+any results that are not currently handled models and ensures
+consistent caching.
diff --git a/docs/searchfield_api.rst b/docs/searchfield_api.rst
new file mode 100644
index 0000000..bf8466b
--- /dev/null
+++ b/docs/searchfield_api.rst
@@ -0,0 +1,262 @@
+.. _ref-searchfield-api:
+
+===================
+``SearchField`` API
+===================
+
+.. class:: SearchField
+
+The ``SearchField`` and its subclasses provides a way to declare what data
+you're interested in indexing. They are used with ``SearchIndexes``, much like
+``forms.*Field`` are used within forms or ``models.*Field`` within models.
+
+They provide both the means for storing data in the index, as well as preparing
+the data before it's placed in the index. Haystack uses all fields from all
+``SearchIndex`` classes to determine what the engine's index schema ought to
+look like.
+
+In practice, you'll likely never actually use the base ``SearchField``, as the
+subclasses are much better at handling real data.
+
+
+Subclasses
+==========
+
+Included with Haystack are the following field types:
+
+* ``BooleanField``
+* ``CharField``
+* ``DateField``
+* ``DateTimeField``
+* ``DecimalField``
+* ``EdgeNgramField``
+* ``FloatField``
+* ``IntegerField``
+* ``LocationField``
+* ``MultiValueField``
+* ``NgramField``
+
+And equivalent faceted versions:
+
+* ``FacetBooleanField``
+* ``FacetCharField``
+* ``FacetDateField``
+* ``FacetDateTimeField``
+* ``FacetDecimalField``
+* ``FacetFloatField``
+* ``FacetIntegerField``
+* ``FacetMultiValueField``
+
+.. note::
+
+ There is no faceted variant of the n-gram fields. Because of how the engine
+ generates n-grams, faceting on these field types (``NgramField`` &
+ ``EdgeNgram``) would make very little sense.
+
+
+Usage
+=====
+
+While ``SearchField`` objects can be used on their own, they're generally used
+within a ``SearchIndex``. You use them in a declarative manner, just like
+fields in ``django.forms.Form`` or ``django.db.models.Model`` objects. For
+example::
+
+ from haystack import indexes
+ from myapp.models import Note
+
+
+ class NoteIndex(indexes.SearchIndex, indexes.Indexable):
+ text = indexes.CharField(document=True, use_template=True)
+ author = indexes.CharField(model_attr='user')
+ pub_date = indexes.DateTimeField(model_attr='pub_date')
+
+ def get_model(self):
+ return Note
+
+This will hook up those fields with the index and, when updating a ``Model``
+object, pull the relevant data out and prepare it for storage in the index.
+
+
+Field Options
+=============
+
+``default``
+-----------
+
+.. attribute:: SearchField.default
+
+Provides a means for specifying a fallback value in the event that no data is
+found for the field. Can be either a value or a callable.
+
+``document``
+------------
+
+.. attribute:: SearchField.document
+
+A boolean flag that indicates which of the fields in the ``SearchIndex`` ought
+to be the primary field for searching within. Default is ``False``.
+
+.. note::
+
+ Only one field can be marked as the ``document=True`` field, so you should
+ standardize this name and the format of the field between all of your
+ ``SearchIndex`` classes.
+
+``indexed``
+-----------
+
+.. attribute:: SearchField.indexed
+
+A boolean flag for indicating whether or not the data from this field will
+be searchable within the index. Default is ``True``.
+
+The companion of this option is ``stored``.
+
+``index_fieldname``
+-------------------
+
+.. attribute:: SearchField.index_fieldname
+
+The ``index_fieldname`` option allows you to force the name of the field in the
+index. This does not change how Haystack refers to the field. This is useful
+when using Solr's dynamic attributes or when integrating with other external
+software.
+
+Default is variable name of the field within the ``SearchIndex``.
+
+``model_attr``
+--------------
+
+.. attribute:: SearchField.model_attr
+
+The ``model_attr`` option is a shortcut for preparing data. Rather than having
+to manually fetch data out of a ``Model``, ``model_attr`` allows you to specify
+a string that will automatically pull data out for you. For example::
+
+ # Automatically looks within the model and populates the field with
+ # the ``last_name`` attribute.
+ author = CharField(model_attr='last_name')
+
+It also handles callables::
+
+ # On a ``User`` object, pulls the full name as pieced together by the
+ # ``get_full_name`` method.
+ author = CharField(model_attr='get_full_name')
+
+And can look through relations::
+
+ # Pulls the ``bio`` field from a ``UserProfile`` object that has a
+ # ``OneToOneField`` relationship to a ``User`` object.
+ biography = CharField(model_attr='user__profile__bio')
+
+``null``
+--------
+
+.. attribute:: SearchField.null
+
+A boolean flag for indicating whether or not it's permissible for the field
+not to contain any data. Default is ``False``.
+
+.. note::
+
+ Unlike Django's database layer, which injects a ``NULL`` into the database
+ when a field is marked nullable, ``null=True`` will actually exclude that
+ field from being included with the document. This is more efficient for the
+ search engine to deal with.
+
+``stored``
+----------
+
+.. attribute:: SearchField.stored
+
+A boolean flag for indicating whether or not the data from this field will
+be stored within the index. Default is ``True``.
+
+This is useful for pulling data out of the index along with the search result
+in order to save on hits to the database.
+
+The companion of this option is ``indexed``.
+
+``template_name``
+-----------------
+
+.. attribute:: SearchField.template_name
+
+Allows you to override the name of the template to use when preparing data. By
+default, the data templates for fields are located within your ``TEMPLATE_DIRS``
+under a path like ``search/indexes/{app_label}/{model_name}_{field_name}.txt``.
+This option lets you override that path (though still within ``TEMPLATE_DIRS``).
+
+Example::
+
+ bio = CharField(use_template=True, template_name='myapp/data/bio.txt')
+
+You can also provide a list of templates, as ``loader.select_template`` is used
+under the hood.
+
+Example::
+
+ bio = CharField(use_template=True, template_name=['myapp/data/bio.txt', 'myapp/bio.txt', 'bio.txt'])
+
+
+``use_template``
+----------------
+
+.. attribute:: SearchField.use_template
+
+A boolean flag for indicating whether or not a field should prepare its data
+via a data template or not. Default is False.
+
+Data templates are extremely useful, as they let you easily tie together
+different parts of the ``Model`` (and potentially related models). This leads
+to better search results with very little effort.
+
+
+
+Method Reference
+================
+
+``__init__``
+------------
+
+.. method:: SearchField.__init__(self, model_attr=None, use_template=False, template_name=None, document=False, indexed=True, stored=True, faceted=False, default=NOT_PROVIDED, null=False, index_fieldname=None, facet_class=None, boost=1.0, weight=None)
+
+Instantiates a fresh ``SearchField`` instance.
+
+``has_default``
+---------------
+
+.. method:: SearchField.has_default(self)
+
+Returns a boolean of whether this field has a default value.
+
+``prepare``
+-----------
+
+.. method:: SearchField.prepare(self, obj)
+
+Takes data from the provided object and prepares it for storage in the
+index.
+
+``prepare_template``
+--------------------
+
+.. method:: SearchField.prepare_template(self, obj)
+
+Flattens an object for indexing.
+
+This loads a template
+(``search/indexes/{app_label}/{model_name}_{field_name}.txt``) and
+returns the result of rendering that template. ``object`` will be in
+its context.
+
+``convert``
+-----------
+
+.. method:: SearchField.convert(self, value)
+
+Handles conversion between the data found and the type of the field.
+
+Extending classes should override this method and provide correct
+data coercion.
diff --git a/docs/searchindex_api.rst b/docs/searchindex_api.rst
new file mode 100644
index 0000000..8263f80
--- /dev/null
+++ b/docs/searchindex_api.rst
@@ -0,0 +1,618 @@
+.. _ref-searchindex-api:
+
+===================
+``SearchIndex`` API
+===================
+
+.. class:: SearchIndex()
+
+The ``SearchIndex`` class allows the application developer a way to provide data to
+the backend in a structured format. Developers familiar with Django's ``Form``
+or ``Model`` classes should find the syntax for indexes familiar.
+
+This class is arguably the most important part of integrating Haystack into your
+application, as it has a large impact on the quality of the search results and
+how easy it is for users to find what they're looking for. Care and effort
+should be put into making your indexes the best they can be.
+
+
+Quick Start
+===========
+
+For the impatient::
+
+ import datetime
+ from haystack import indexes
+ from myapp.models import Note
+
+
+ class NoteIndex(indexes.SearchIndex, indexes.Indexable):
+ text = indexes.CharField(document=True, use_template=True)
+ author = indexes.CharField(model_attr='user')
+ pub_date = indexes.DateTimeField(model_attr='pub_date')
+
+ def get_model(self):
+ return Note
+
+ def index_queryset(self, using=None):
+ "Used when the entire index for model is updated."
+ return self.get_model().objects.filter(pub_date__lte=datetime.datetime.now())
+
+
+Background
+==========
+
+Unlike relational databases, most search engines supported by Haystack are
+primarily document-based. They focus on a single text blob which they tokenize,
+analyze and index. When searching, this field is usually the primary one that
+is searched.
+
+Further, the schema used by most engines is the same for all types of data
+added, unlike a relational database that has a table schema for each chunk of
+data.
+
+It may be helpful to think of your search index as something closer to a
+key-value store instead of imagining it in terms of a RDBMS.
+
+
+Why Create Fields?
+------------------
+
+Despite being primarily document-driven, most search engines also support the
+ability to associate other relevant data with the indexed document. These
+attributes can be mapped through the use of fields within Haystack.
+
+Common uses include storing pertinent data information, categorizations of the
+document, author information and related data. By adding fields for these pieces
+of data, you provide a means to further narrow/filter search terms. This can
+be useful from either a UI perspective (a better advanced search form) or from a
+developer standpoint (section-dependent search, off-loading certain tasks to
+search, et cetera).
+
+.. warning::
+
+ Haystack reserves the following field names for internal use: ``id``,
+ ``django_ct``, ``django_id`` & ``content``. The ``name`` & ``type`` names
+ used to be reserved but no longer are.
+
+ You can override these field names using the ``HAYSTACK_ID_FIELD``,
+ ``HAYSTACK_DJANGO_CT_FIELD`` & ``HAYSTACK_DJANGO_ID_FIELD`` if needed.
+
+
+Significance Of ``document=True``
+---------------------------------
+
+Most search engines that were candidates for inclusion in Haystack all had a
+central concept of a document that they indexed. These documents form a corpus
+within which to primarily search. Because this ideal is so central and most of
+Haystack is designed to have pluggable backends, it is important to ensure that
+all engines have at least a bare minimum of the data they need to function.
+
+As a result, when creating a ``SearchIndex``, at least one field must be marked
+with ``document=True``. This signifies to Haystack that whatever is placed in
+this field while indexing is to be the primary text the search engine indexes.
+The name of this field can be almost anything, but ``text`` is one of the
+more common names used.
+
+
+Stored/Indexed Fields
+---------------------
+
+One shortcoming of the use of search is that you rarely have all or the most
+up-to-date information about an object in the index. As a result, when
+retrieving search results, you will likely have to access the object in the
+database to provide better information.
+
+However, this can also hit the database quite heavily (think
+``.get(pk=result.id)`` per object). If your search is popular, this can lead
+to a big performance hit. There are two ways to prevent this. The first way is
+``SearchQuerySet.load_all``, which tries to group all similar objects and pull
+them through one query instead of many. This still hits the DB and incurs a
+performance penalty.
+
+The other option is to leverage stored fields. By default, all fields in
+Haystack are both indexed (searchable by the engine) and stored (retained by
+the engine and presented in the results). By using a stored field, you can
+store commonly used data in such a way that you don't need to hit the database
+when processing the search result to get more information.
+
+For example, one great way to leverage this is to pre-rendering an object's
+search result template DURING indexing. You define an additional field, render
+a template with it and it follows the main indexed record into the index. Then,
+when that record is pulled when it matches a query, you can simply display the
+contents of that field, which avoids the database hit.:
+
+Within ``myapp/search_indexes.py``::
+
+ class NoteIndex(SearchIndex, indexes.Indexable):
+ text = CharField(document=True, use_template=True)
+ author = CharField(model_attr='user')
+ pub_date = DateTimeField(model_attr='pub_date')
+ # Define the additional field.
+ rendered = CharField(use_template=True, indexed=False)
+
+Then, inside a template named ``search/indexes/myapp/note_rendered.txt``::
+
+ <h2>{{ object.title }}</h2>
+
+ <p>{{ object.content }}</p>
+
+And finally, in ``search/search.html``::
+
+ ...
+
+ {% for result in page.object_list %}
+ <div class="search_result">
+ {{ result.rendered|safe }}
+ </div>
+ {% endfor %}
+
+
+Keeping The Index Fresh
+=======================
+
+There are several approaches to keeping the search index in sync with your
+database. None are more correct than the others and depending the traffic you
+see, the churn rate of your data and what concerns are important to you
+(CPU load, how recent, et cetera).
+
+The conventional method is to use ``SearchIndex`` in combination with cron
+jobs. Running a ``./manage.py update_index`` every couple hours will keep your
+data in sync within that timeframe and will handle the updates in a very
+efficient batch. Additionally, Whoosh (and to a lesser extent Xapian) behaves
+better when using this approach.
+
+Another option is to use ``RealtimeSignalProcessor``, which uses Django's
+signals to immediately update the index any time a model is saved/deleted. This
+yields a much more current search index at the expense of being fairly
+inefficient. Solr & Elasticsearch are the only backends that handles this well
+under load, and even then, you should make sure you have the server capacity
+to spare.
+
+A third option is to develop a custom ``QueuedSignalProcessor`` that, much like
+``RealtimeSignalProcessor``, uses Django's signals to enqueue messages for
+updates/deletes. Then writing a management command to consume these messages
+in batches, yielding a nice compromise between the previous two options.
+
+For more information see :doc:`signal_processors`.
+
+.. note::
+
+ Haystack doesn't ship with a ``QueuedSignalProcessor`` largely because there is
+ such a diversity of lightweight queuing options and that they tend to
+ polarize developers. Queuing is outside of Haystack's goals (provide good,
+ powerful search) and, as such, is left to the developer.
+
+ Additionally, the implementation is relatively trivial & there are already
+ good third-party add-ons for Haystack to enable this.
+
+
+Advanced Data Preparation
+=========================
+
+In most cases, using the `model_attr` parameter on your fields allows you to
+easily get data from a Django model to the document in your index, as it handles
+both direct attribute access as well as callable functions within your model.
+
+.. note::
+
+ The ``model_attr`` keyword argument also can look through relations in
+ models. So you can do something like ``model_attr='author__first_name'``
+ to pull just the first name of the author, similar to some lookups used
+ by Django's ORM.
+
+However, sometimes, even more control over what gets placed in your index is
+needed. To facilitate this, ``SearchIndex`` objects have a 'preparation' stage
+that populates data just before it is indexed. You can hook into this phase in
+several ways.
+
+This should be very familiar to developers who have used Django's ``forms``
+before as it loosely follows similar concepts, though the emphasis here is
+less on cleansing data from user input and more on making the data friendly
+to the search backend.
+
+1. ``prepare_FOO(self, object)``
+--------------------------------
+
+The most common way to affect a single field's data is to create a
+``prepare_FOO`` method (where FOO is the name of the field). As a parameter
+to this method, you will receive the instance that is attempting to be indexed.
+
+.. note::
+
+ This method is analogous to Django's ``Form.clean_FOO`` methods.
+
+To keep with our existing example, one use case might be altering the name
+inside the ``author`` field to be "firstname lastname <email>". In this case,
+you might write the following code::
+
+ class NoteIndex(SearchIndex, indexes.Indexable):
+ text = CharField(document=True, use_template=True)
+ author = CharField(model_attr='user')
+ pub_date = DateTimeField(model_attr='pub_date')
+
+ def get_model(self):
+ return Note
+
+ def prepare_author(self, obj):
+ return "%s <%s>" % (obj.user.get_full_name(), obj.user.email)
+
+This method should return a single value (or list/tuple/dict) to populate that
+field's data upon indexing. Note that this method takes priority over whatever
+data may come from the field itself.
+
+Just like ``Form.clean_FOO``, the field's ``prepare`` runs before the
+``prepare_FOO``, allowing you to access ``self.prepared_data``. For example::
+
+ class NoteIndex(SearchIndex, indexes.Indexable):
+ text = CharField(document=True, use_template=True)
+ author = CharField(model_attr='user')
+ pub_date = DateTimeField(model_attr='pub_date')
+
+ def get_model(self):
+ return Note
+
+ def prepare_author(self, obj):
+ # Say we want last name first, the hard way.
+ author = u''
+
+ if 'author' in self.prepared_data:
+ name_bits = self.prepared_data['author'].split()
+ author = "%s, %s" % (name_bits[-1], ' '.join(name_bits[:-1]))
+
+ return author
+
+This method is fully function with ``model_attr``, so if there's no convenient
+way to access the data you want, this is an excellent way to prepare it::
+
+ class NoteIndex(SearchIndex, indexes.Indexable):
+ text = CharField(document=True, use_template=True)
+ author = CharField(model_attr='user')
+ categories = MultiValueField()
+ pub_date = DateTimeField(model_attr='pub_date')
+
+ def get_model(self):
+ return Note
+
+ def prepare_categories(self, obj):
+ # Since we're using a M2M relationship with a complex lookup,
+ # we can prepare the list here.
+ return [category.id for category in obj.category_set.active().order_by('-created')]
+
+
+2. ``prepare(self, object)``
+----------------------------
+
+Each ``SearchIndex`` gets a ``prepare`` method, which handles collecting all
+the data. This method should return a dictionary that will be the final data
+used by the search backend.
+
+Overriding this method is useful if you need to collect more than one piece
+of data or need to incorporate additional data that is not well represented
+by a single ``SearchField``. An example might look like::
+
+ class NoteIndex(SearchIndex, indexes.Indexable):
+ text = CharField(document=True, use_template=True)
+ author = CharField(model_attr='user')
+ pub_date = DateTimeField(model_attr='pub_date')
+
+ def get_model(self):
+ return Note
+
+ def prepare(self, object):
+ self.prepared_data = super(NoteIndex, self).prepare(object)
+
+ # Add in tags (assuming there's a M2M relationship to Tag on the model).
+ # Note that this would NOT get picked up by the automatic
+ # schema tools provided by Haystack.
+ self.prepared_data['tags'] = [tag.name for tag in object.tags.all()]
+
+ return self.prepared_data
+
+If you choose to use this method, you should make a point to be careful to call
+the ``super()`` method before altering the data. Without doing so, you may have
+an incomplete set of data populating your indexes.
+
+This method has the final say in all data, overriding both what the fields
+provide as well as any ``prepare_FOO`` methods on the class.
+
+.. note::
+
+ This method is roughly analogous to Django's ``Form.full_clean`` and
+ ``Form.clean`` methods. However, unlike these methods, it is not fired
+ as the result of trying to access ``self.prepared_data``. It requires
+ an explicit call.
+
+
+3. Overriding ``prepare(self, object)`` On Individual ``SearchField`` Objects
+-----------------------------------------------------------------------------
+
+The final way to manipulate your data is to implement a custom ``SearchField``
+object and write its ``prepare`` method to populate/alter the data any way you
+choose. For instance, a (naive) user-created ``GeoPointField`` might look
+something like::
+
+ from django.utils import six
+ from haystack import indexes
+
+ class GeoPointField(indexes.CharField):
+ def __init__(self, **kwargs):
+ kwargs['default'] = '0.00-0.00'
+ super(GeoPointField, self).__init__(**kwargs)
+
+ def prepare(self, obj):
+ return six.text_type("%s-%s" % (obj.latitude, obj.longitude))
+
+The ``prepare`` method simply returns the value to be used for that field. It's
+entirely possible to include data that's not directly referenced to the object
+here, depending on your needs.
+
+Note that this is NOT a recommended approach to storing geographic data in a
+search engine (there is no formal suggestion on this as support is usually
+non-existent), merely an example of how to extend existing fields.
+
+.. note::
+
+ This method is analagous to Django's ``Field.clean`` methods.
+
+
+Adding New Fields
+=================
+
+If you have an existing ``SearchIndex`` and you add a new field to it, Haystack
+will add this new data on any updates it sees after that point. However, this
+will not populate the existing data you already have.
+
+In order for the data to be picked up, you will need to run ``./manage.py
+rebuild_index``. This will cause all backends to rebuild the existing data
+already present in the quickest and most efficient way.
+
+.. note::
+
+ With the Solr backend, you'll also have to add to the appropriate
+ ``schema.xml`` for your configuration before running the ``rebuild_index``.
+
+
+``Search Index``
+================
+
+``get_model``
+-------------
+
+.. method:: SearchIndex.get_model(self)
+
+Should return the ``Model`` class (not an instance) that the rest of the
+``SearchIndex`` should use.
+
+This method is required & you must override it to return the correct class.
+
+``index_queryset``
+------------------
+
+.. method:: SearchIndex.index_queryset(self, using=None)
+
+Get the default QuerySet to index when doing a full update.
+
+Subclasses can override this method to avoid indexing certain objects.
+
+``read_queryset``
+-----------------
+
+.. method:: SearchIndex.read_queryset(self, using=None)
+
+Get the default QuerySet for read actions.
+
+Subclasses can override this method to work with other managers.
+Useful when working with default managers that filter some objects.
+
+``build_queryset``
+-------------------
+
+.. method:: SearchIndex.build_queryset(self, start_date=None, end_date=None)
+
+Get the default QuerySet to index when doing an index update.
+
+Subclasses can override this method to take into account related
+model modification times.
+
+The default is to use ``SearchIndex.index_queryset`` and filter
+based on ``SearchIndex.get_updated_field``
+
+``prepare``
+-----------
+
+.. method:: SearchIndex.prepare(self, obj)
+
+Fetches and adds/alters data before indexing.
+
+``get_content_field``
+---------------------
+
+.. method:: SearchIndex.get_content_field(self)
+
+Returns the field that supplies the primary document to be indexed.
+
+``update``
+----------
+
+.. method:: SearchIndex.update(self, using=None)
+
+Updates the entire index.
+
+If ``using`` is provided, it specifies which connection should be
+used. Default relies on the routers to decide which backend should
+be used.
+
+``update_object``
+-----------------
+
+.. method:: SearchIndex.update_object(self, instance, using=None, **kwargs)
+
+Update the index for a single object. Attached to the class's
+post-save hook.
+
+If ``using`` is provided, it specifies which connection should be
+used. Default relies on the routers to decide which backend should
+be used.
+
+``remove_object``
+-----------------
+
+.. method:: SearchIndex.remove_object(self, instance, using=None, **kwargs)
+
+Remove an object from the index. Attached to the class's
+post-delete hook.
+
+If ``using`` is provided, it specifies which connection should be
+used. Default relies on the routers to decide which backend should
+be used.
+
+``clear``
+---------
+
+.. method:: SearchIndex.clear(self, using=None)
+
+Clears the entire index.
+
+If ``using`` is provided, it specifies which connection should be
+used. Default relies on the routers to decide which backend should
+be used.
+
+``reindex``
+-----------
+
+.. method:: SearchIndex.reindex(self, using=None)
+
+Completely clears the index for this model and rebuilds it.
+
+If ``using`` is provided, it specifies which connection should be
+used. Default relies on the routers to decide which backend should
+be used.
+
+``get_updated_field``
+---------------------
+
+.. method:: SearchIndex.get_updated_field(self)
+
+Get the field name that represents the updated date for the model.
+
+If specified, this is used by the reindex command to filter out results
+from the ``QuerySet``, enabling you to reindex only recent records. This
+method should either return None (reindex everything always) or a
+string of the ``Model``'s ``DateField``/``DateTimeField`` name.
+
+``should_update``
+-----------------
+
+.. method:: SearchIndex.should_update(self, instance, **kwargs)
+
+Determine if an object should be updated in the index.
+
+It's useful to override this when an object may save frequently and
+cause excessive reindexing. You should check conditions on the instance
+and return False if it is not to be indexed.
+
+The ``kwargs`` passed along to this method can be the same as the ones passed
+by Django when a Model is saved/delete, so it's possible to check if the object
+has been created or not. See ``django.db.models.signals.post_save`` for details
+on what is passed.
+
+By default, returns True (always reindex).
+
+``load_all_queryset``
+---------------------
+
+.. method:: SearchIndex.load_all_queryset(self)
+
+Provides the ability to override how objects get loaded in conjunction
+with ``RelatedSearchQuerySet.load_all``. This is useful for post-processing the
+results from the query, enabling things like adding ``select_related`` or
+filtering certain data.
+
+.. warning::
+
+ Utilizing this functionality can have negative performance implications.
+ Please see the section on ``RelatedSearchQuerySet`` within
+ :doc:`searchqueryset_api` for further information.
+
+By default, returns ``all()`` on the model's default manager.
+
+Example::
+
+ class NoteIndex(SearchIndex, indexes.Indexable):
+ text = CharField(document=True, use_template=True)
+ author = CharField(model_attr='user')
+ pub_date = DateTimeField(model_attr='pub_date')
+
+ def get_model(self):
+ return Note
+
+ def load_all_queryset(self):
+ # Pull all objects related to the Note in search results.
+ return Note.objects.all().select_related()
+
+When searching, the ``RelatedSearchQuerySet`` appends on a call to ``in_bulk``, so be
+sure that the ``QuerySet`` you provide can accommodate this and that the ids
+passed to ``in_bulk`` will map to the model in question.
+
+If you need a specific ``QuerySet`` in one place, you can specify this at the
+``RelatedSearchQuerySet`` level using the ``load_all_queryset`` method. See
+:doc:`searchqueryset_api` for usage.
+
+
+``ModelSearchIndex``
+====================
+
+The ``ModelSearchIndex`` class allows for automatic generation of a
+``SearchIndex`` based on the fields of the model assigned to it.
+
+With the exception of the automated introspection, it is a ``SearchIndex``
+class, so all notes above pertaining to ``SearchIndexes`` apply. As with the
+``ModelForm`` class in Django, it employs an inner class called ``Meta``, which
+should contain a ``model`` attribute. By default all non-relational model
+fields are included as search fields on the index, but fields can be restricted
+by way of a ``fields`` whitelist, or excluded with an ``excludes`` list, to
+prevent certain fields from appearing in the class.
+
+In addition, it adds a `text` field that is the ``document=True`` field and
+has `use_template=True` option set, just like the ``BasicSearchIndex``.
+
+.. warning::
+
+ Usage of this class might result in inferior ``SearchIndex`` objects, which
+ can directly affect your search results. Use this to establish basic
+ functionality and move to custom `SearchIndex` objects for better control.
+
+At this time, it does not handle related fields.
+
+Quick Start
+-----------
+
+For the impatient::
+
+ import datetime
+ from haystack import indexes
+ from myapp.models import Note
+
+ # All Fields
+ class AllNoteIndex(indexes.ModelSearchIndex, indexes.Indexable):
+ class Meta:
+ model = Note
+
+ # Blacklisted Fields
+ class LimitedNoteIndex(indexes.ModelSearchIndex, indexes.Indexable):
+ class Meta:
+ model = Note
+ excludes = ['user']
+
+ # Whitelisted Fields
+ class NoteIndex(indexes.ModelSearchIndex, indexes.Indexable):
+ class Meta:
+ model = Note
+ fields = ['user', 'pub_date']
+
+ # Note that regular ``SearchIndex`` methods apply.
+ def index_queryset(self, using=None):
+ "Used when the entire index for model is updated."
+ return Note.objects.filter(pub_date__lte=datetime.datetime.now())
+
diff --git a/docs/searchquery_api.rst b/docs/searchquery_api.rst
new file mode 100644
index 0000000..305557e
--- /dev/null
+++ b/docs/searchquery_api.rst
@@ -0,0 +1,336 @@
+.. _ref-searchquery-api:
+
+===================
+``SearchQuery`` API
+===================
+
+.. class:: SearchQuery(using=DEFAULT_ALIAS)
+
+The ``SearchQuery`` class acts as an intermediary between ``SearchQuerySet``'s
+abstraction and ``SearchBackend``'s actual search. Given the metadata provided
+by ``SearchQuerySet``, ``SearchQuery`` builds the actual query and interacts
+with the ``SearchBackend`` on ``SearchQuerySet``'s behalf.
+
+This class must be at least partially implemented on a per-backend basis, as portions
+are highly specific to the backend. It usually is bundled with the accompanying
+``SearchBackend``.
+
+Most people will **NOT** have to use this class directly. ``SearchQuerySet``
+handles all interactions with ``SearchQuery`` objects and provides a nicer
+interface to work with.
+
+Should you need advanced/custom behavior, you can supply your version of
+``SearchQuery`` that overrides/extends the class in the manner you see fit.
+You can either hook it up in a ``BaseEngine`` subclass or ``SearchQuerySet``
+objects take a kwarg parameter ``query`` where you can pass in your class.
+
+
+``SQ`` Objects
+==============
+
+For expressing more complex queries, especially involving AND/OR/NOT in
+different combinations, you should use ``SQ`` objects. Like
+``django.db.models.Q`` objects, ``SQ`` objects can be passed to
+``SearchQuerySet.filter`` and use the familiar unary operators (``&``, ``|`` and
+``~``) to generate complex parts of the query.
+
+.. warning::
+
+ Any data you pass to ``SQ`` objects is passed along **unescaped**. If
+ you don't trust the data you're passing along, you should use
+ the ``clean`` method on your ``SearchQuery`` to sanitize the data.
+
+Example::
+
+ from haystack.query import SQ
+
+ # We want "title: Foo AND (tags:bar OR tags:moof)"
+ sqs = SearchQuerySet().filter(title='Foo').filter(SQ(tags='bar') | SQ(tags='moof'))
+
+ # To clean user-provided data:
+ sqs = SearchQuerySet()
+ clean_query = sqs.query.clean(user_query)
+ sqs = sqs.filter(SQ(title=clean_query) | SQ(tags=clean_query))
+
+Internally, the ``SearchQuery`` object maintains a tree of ``SQ`` objects. Each
+``SQ`` object supports what field it looks up against, what kind of lookup (i.e.
+the ``__`` filters), what value it's looking for, if it's a AND/OR/NOT and
+tracks any children it may have. The ``SearchQuery.build_query`` method starts
+with the root of the tree, building part of the final query at each node until
+the full final query is ready for the ``SearchBackend``.
+
+
+Backend-Specific Methods
+========================
+
+When implementing a new backend, the following methods will need to be created:
+
+``build_query_fragment``
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. method:: SearchQuery.build_query_fragment(self, field, filter_type, value)
+
+Generates a query fragment from a field, filter type and a value.
+
+Must be implemented in backends as this will be highly backend specific.
+
+
+Inheritable Methods
+===================
+
+The following methods have a complete implementation in the base class and
+can largely be used unchanged.
+
+``build_query``
+~~~~~~~~~~~~~~~
+
+.. method:: SearchQuery.build_query(self)
+
+Interprets the collected query metadata and builds the final query to
+be sent to the backend.
+
+``build_params``
+~~~~~~~~~~~~~~~~
+
+.. method:: SearchQuery.build_params(self, spelling_query=None)
+
+Generates a list of params to use when searching.
+
+``clean``
+~~~~~~~~~
+
+.. method:: SearchQuery.clean(self, query_fragment)
+
+Provides a mechanism for sanitizing user input before presenting the
+value to the backend.
+
+A basic (override-able) implementation is provided.
+
+``run``
+~~~~~~~
+
+.. method:: SearchQuery.run(self, spelling_query=None, **kwargs)
+
+Builds and executes the query. Returns a list of search results.
+
+Optionally passes along an alternate query for spelling suggestions.
+
+Optionally passes along more kwargs for controlling the search query.
+
+``run_mlt``
+~~~~~~~~~~~
+
+.. method:: SearchQuery.run_mlt(self, **kwargs)
+
+Executes the More Like This. Returns a list of search results similar
+to the provided document (and optionally query).
+
+``run_raw``
+~~~~~~~~~~~
+
+.. method:: SearchQuery.run_raw(self, **kwargs)
+
+Executes a raw query. Returns a list of search results.
+
+``get_count``
+~~~~~~~~~~~~~
+
+.. method:: SearchQuery.get_count(self)
+
+Returns the number of results the backend found for the query.
+
+If the query has not been run, this will execute the query and store
+the results.
+
+``get_results``
+~~~~~~~~~~~~~~~
+
+.. method:: SearchQuery.get_results(self, **kwargs)
+
+Returns the results received from the backend.
+
+If the query has not been run, this will execute the query and store
+the results.
+
+``get_facet_counts``
+~~~~~~~~~~~~~~~~~~~~
+
+.. method:: SearchQuery.get_facet_counts(self)
+
+Returns the results received from the backend.
+
+If the query has not been run, this will execute the query and store
+the results.
+
+``boost_fragment``
+~~~~~~~~~~~~~~~~~~
+
+.. method:: SearchQuery.boost_fragment(self, boost_word, boost_value)
+
+Generates query fragment for boosting a single word/value pair.
+
+``matching_all_fragment``
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. method:: SearchQuery.matching_all_fragment(self)
+
+Generates the query that matches all documents.
+
+``add_filter``
+~~~~~~~~~~~~~~
+
+.. method:: SearchQuery.add_filter(self, expression, value, use_not=False, use_or=False)
+
+Narrows the search by requiring certain conditions.
+
+``add_order_by``
+~~~~~~~~~~~~~~~~
+
+.. method:: SearchQuery.add_order_by(self, field)
+
+Orders the search result by a field.
+
+``clear_order_by``
+~~~~~~~~~~~~~~~~~~
+
+.. method:: SearchQuery.clear_order_by(self)
+
+Clears out all ordering that has been already added, reverting the
+query to relevancy.
+
+``add_model``
+~~~~~~~~~~~~~
+
+.. method:: SearchQuery.add_model(self, model)
+
+Restricts the query requiring matches in the given model.
+
+This builds upon previous additions, so you can limit to multiple models
+by chaining this method several times.
+
+``set_limits``
+~~~~~~~~~~~~~~
+
+.. method:: SearchQuery.set_limits(self, low=None, high=None)
+
+Restricts the query by altering either the start, end or both offsets.
+
+``clear_limits``
+~~~~~~~~~~~~~~~~
+
+.. method:: SearchQuery.clear_limits(self)
+
+Clears any existing limits.
+
+``add_boost``
+~~~~~~~~~~~~~
+
+.. method:: SearchQuery.add_boost(self, term, boost_value)
+
+Adds a boosted term and the amount to boost it to the query.
+
+``raw_search``
+~~~~~~~~~~~~~~
+
+.. method:: SearchQuery.raw_search(self, query_string, **kwargs)
+
+Runs a raw query (no parsing) against the backend.
+
+This method causes the ``SearchQuery`` to ignore the standard query-generating
+facilities, running only what was provided instead.
+
+Note that any kwargs passed along will override anything provided
+to the rest of the ``SearchQuerySet``.
+
+``more_like_this``
+~~~~~~~~~~~~~~~~~~
+
+.. method:: SearchQuery.more_like_this(self, model_instance)
+
+Allows backends with support for "More Like This" to return results
+similar to the provided instance.
+
+``add_stats_query``
+~~~~~~~~~~~~~~~~~~~
+.. method:: SearchQuery.add_stats_query(self,stats_field,stats_facets)
+
+Adds stats and stats_facets queries for the Solr backend.
+
+``add_highlight``
+~~~~~~~~~~~~~~~~~
+
+.. method:: SearchQuery.add_highlight(self)
+
+Adds highlighting to the search results.
+
+``add_within``
+~~~~~~~~~~~~~~
+
+.. method:: SearchQuery.add_within(self, field, point_1, point_2):
+
+Adds bounding box parameters to search query.
+
+``add_dwithin``
+~~~~~~~~~~~~~~~
+
+.. method:: SearchQuery.add_dwithin(self, field, point, distance):
+
+Adds radius-based parameters to search query.
+
+``add_distance``
+~~~~~~~~~~~~~~~~
+
+.. method:: SearchQuery.add_distance(self, field, point):
+
+Denotes that results should include distance measurements from the
+point passed in.
+
+``add_field_facet``
+~~~~~~~~~~~~~~~~~~~
+
+.. method:: SearchQuery.add_field_facet(self, field, **options)
+
+Adds a regular facet on a field.
+
+``add_date_facet``
+~~~~~~~~~~~~~~~~~~
+
+.. method:: SearchQuery.add_date_facet(self, field, start_date, end_date, gap_by, gap_amount)
+
+Adds a date-based facet on a field.
+
+``add_query_facet``
+~~~~~~~~~~~~~~~~~~~
+
+.. method:: SearchQuery.add_query_facet(self, field, query)
+
+Adds a query facet on a field.
+
+``add_narrow_query``
+~~~~~~~~~~~~~~~~~~~~
+
+.. method:: SearchQuery.add_narrow_query(self, query)
+
+Narrows a search to a subset of all documents per the query.
+
+Generally used in conjunction with faceting.
+
+``set_result_class``
+~~~~~~~~~~~~~~~~~~~~
+
+.. method:: SearchQuery.set_result_class(self, klass)
+
+Sets the result class to use for results.
+
+Overrides any previous usages. If ``None`` is provided, Haystack will
+revert back to the default ``SearchResult`` object.
+
+``using``
+~~~~~~~~~
+
+.. method:: SearchQuery.using(self, using=None)
+
+Allows for overriding which connection should be used. This
+disables the use of routers when performing the query.
+
+If ``None`` is provided, it has no effect on what backend is used.
diff --git a/docs/searchqueryset_api.rst b/docs/searchqueryset_api.rst
new file mode 100644
index 0000000..03bb34a
--- /dev/null
+++ b/docs/searchqueryset_api.rst
@@ -0,0 +1,893 @@
+.. _ref-searchqueryset-api:
+
+======================
+``SearchQuerySet`` API
+======================
+
+.. class:: SearchQuerySet(using=None, query=None)
+
+The ``SearchQuerySet`` class is designed to make performing a search and
+iterating over its results easy and consistent. For those familiar with Django's
+ORM ``QuerySet``, much of the ``SearchQuerySet`` API should feel familiar.
+
+
+Why Follow ``QuerySet``?
+========================
+
+A couple reasons to follow (at least in part) the ``QuerySet`` API:
+
+#. Consistency with Django
+#. Most Django programmers have experience with the ORM and can use this
+ knowledge with ``SearchQuerySet``.
+
+And from a high-level perspective, ``QuerySet`` and ``SearchQuerySet`` do very similar
+things: given certain criteria, provide a set of results. Both are powered by
+multiple backends, both are abstractions on top of the way a query is performed.
+
+
+Quick Start
+===========
+
+For the impatient::
+
+ from haystack.query import SearchQuerySet
+ all_results = SearchQuerySet().all()
+ hello_results = SearchQuerySet().filter(content='hello')
+ hello_world_results = SearchQuerySet().filter(content='hello world')
+ unfriendly_results = SearchQuerySet().exclude(content='hello').filter(content='world')
+ recent_results = SearchQuerySet().order_by('-pub_date')[:5]
+
+ # Using the new input types...
+ from haystack.inputs import AutoQuery, Exact, Clean
+ sqs = SearchQuerySet().filter(content=AutoQuery(request.GET['q']), product_type=Exact('ancient book'))
+
+ if request.GET['product_url']:
+ sqs = sqs.filter(product_url=Clean(request.GET['product_url']))
+
+For more on the ``AutoQuery``, ``Exact``, ``Clean`` classes & friends, see the
+:ref:`ref-inputtypes` documentation.
+
+
+``SearchQuerySet``
+==================
+
+By default, ``SearchQuerySet`` provide the documented functionality. You can
+extend with your own behavior by simply subclassing from ``SearchQuerySet`` and
+adding what you need, then using your subclass in place of ``SearchQuerySet``.
+
+Most methods in ``SearchQuerySet`` "chain" in a similar fashion to ``QuerySet``.
+Additionally, like ``QuerySet``, ``SearchQuerySet`` is lazy (meaning it evaluates the
+query as late as possible). So the following is valid::
+
+ from haystack.query import SearchQuerySet
+ results = SearchQuerySet().exclude(content='hello').filter(content='world').order_by('-pub_date').boost('title', 0.5)[10:20]
+
+
+The ``content`` Shortcut
+========================
+
+Searching your document fields is a very common activity. To help mitigate
+possible differences in ``SearchField`` names (and to help the backends deal
+with search queries that inspect the main corpus), there is a special field
+called ``content``. You may use this in any place that other fields names would
+work (e.g. ``filter``, ``exclude``, etc.) to indicate you simply want to
+search the main documents.
+
+For example::
+
+ from haystack.query import SearchQuerySet
+
+ # This searches whatever fields were marked ``document=True``.
+ results = SearchQuerySet().exclude(content='hello')
+
+This special pseudo-field works best with the ``exact`` lookup and may yield
+strange or unexpected results with the other lookups.
+
+
+``SearchQuerySet`` Methods
+==========================
+
+The primary interface to search in Haystack is through the ``SearchQuerySet``
+object. It provides a clean, programmatic, portable API to the search backend.
+Many aspects are also "chainable", meaning you can call methods one after another, each
+applying their changes to the previous ``SearchQuerySet`` and further narrowing
+the search.
+
+All ``SearchQuerySet`` objects implement a list-like interface, meaning you can
+perform actions like getting the length of the results, accessing a result at an
+offset or even slicing the result list.
+
+
+Methods That Return A ``SearchQuerySet``
+----------------------------------------
+
+``all``
+~~~~~~~
+
+.. method:: SearchQuerySet.all(self):
+
+Returns all results for the query. This is largely a no-op (returns an identical
+copy) but useful for denoting exactly what behavior is going on.
+
+``none``
+~~~~~~~~
+
+.. method:: SearchQuerySet.none(self):
+
+Returns an ``EmptySearchQuerySet`` that behaves like a ``SearchQuerySet`` but
+always yields no results.
+
+``filter``
+~~~~~~~~~~
+
+.. method:: SearchQuerySet.filter(self, **kwargs)
+
+Filters the search by looking for (and including) certain attributes.
+
+The lookup parameters (``**kwargs``) should follow the `Field lookups`_ below.
+If you specify more than one pair, they will be joined in the query according to
+the ``HAYSTACK_DEFAULT_OPERATOR`` setting (defaults to ``AND``).
+
+You can pass it either strings or a variety of :ref:`ref-inputtypes` if you
+need more advanced query behavior.
+
+.. warning::
+
+ Any data you pass to ``filter`` gets auto-escaped. If you need to send
+ non-escaped data, use the ``Raw`` input type (:ref:`ref-inputtypes`).
+
+ Also, if a string with one or more spaces in it is specified as the value, the
+ string will get passed along **AS IS**. This will mean that it will **NOT**
+ be treated as a phrase (like Haystack 1.X's behavior).
+
+ If you want to match a phrase, you should use either the ``__exact`` filter
+ type or the ``Exact`` input type (:ref:`ref-inputtypes`).
+
+Examples::
+
+ sqs = SearchQuerySet().filter(content='foo')
+
+ sqs = SearchQuerySet().filter(content='foo', pub_date__lte=datetime.date(2008, 1, 1))
+
+ # Identical to the previous example.
+ sqs = SearchQuerySet().filter(content='foo').filter(pub_date__lte=datetime.date(2008, 1, 1))
+
+ # To send unescaped data:
+ from haystack.inputs import Raw
+ sqs = SearchQuerySet().filter(title=Raw(trusted_query))
+
+ # To use auto-query behavior on a non-``document=True`` field.
+ from haystack.inputs import AutoQuery
+ sqs = SearchQuerySet().filter(title=AutoQuery(user_query))
+
+
+``exclude``
+~~~~~~~~~~~
+
+.. method:: SearchQuerySet.exclude(self, **kwargs)
+
+Narrows the search by ensuring certain attributes are not included.
+
+.. warning::
+
+ Any data you pass to ``exclude`` gets auto-escaped. If you need to send
+ non-escaped data, use the ``Raw`` input type (:ref:`ref-inputtypes`).
+
+Example::
+
+ sqs = SearchQuerySet().exclude(content='foo')
+
+
+``filter_and``
+~~~~~~~~~~~~~~
+
+.. method:: SearchQuerySet.filter_and(self, **kwargs)
+
+Narrows the search by looking for (and including) certain attributes. Join
+behavior in the query is forced to be ``AND``. Used primarily by the ``filter``
+method.
+
+``filter_or``
+~~~~~~~~~~~~~
+
+.. method:: SearchQuerySet.filter_or(self, **kwargs)
+
+Narrows the search by looking for (and including) certain attributes. Join
+behavior in the query is forced to be ``OR``. Used primarily by the ``filter``
+method.
+
+``order_by``
+~~~~~~~~~~~~
+
+.. method:: SearchQuerySet.order_by(self, *args)
+
+Alters the order in which the results should appear. Arguments should be strings
+that map to the attributes/fields within the index. You may specify multiple
+fields by comma separating them::
+
+ SearchQuerySet().filter(content='foo').order_by('author', 'pub_date')
+
+Default behavior is ascending order. To specify descending order, prepend the
+string with a ``-``::
+
+ SearchQuerySet().filter(content='foo').order_by('-pub_date')
+
+.. note::
+
+ In general, ordering is locale-specific. Haystack makes no effort to try to
+ reconcile differences between characters from different languages. This
+ means that accented characters will sort closely with the same character
+ and **NOT** necessarily close to the unaccented form of the character.
+
+ If you want this kind of behavior, you should override the ``prepare_FOO``
+ methods on your ``SearchIndex`` objects to transliterate the characters
+ as you see fit.
+
+``highlight``
+~~~~~~~~~~~~~
+
+.. method:: SearchQuerySet.highlight(self)
+
+If supported by the backend, the ``SearchResult`` objects returned will include
+a highlighted version of the result::
+
+ sqs = SearchQuerySet().filter(content='foo').highlight()
+ result = sqs[0]
+ result.highlighted['text'][0] # u'Two computer scientists walk into a bar. The bartender says "<em>Foo</em>!".'
+
+``models``
+~~~~~~~~~~
+
+.. method:: SearchQuerySet.models(self, *models)
+
+Accepts an arbitrary number of Model classes to include in the search. This will
+narrow the search results to only include results from the models specified.
+
+Example::
+
+ SearchQuerySet().filter(content='foo').models(BlogEntry, Comment)
+
+``result_class``
+~~~~~~~~~~~~~~~~
+
+.. method:: SearchQuerySet.result_class(self, klass)
+
+Allows specifying a different class to use for results.
+
+Overrides any previous usages. If ``None`` is provided, Haystack will
+revert back to the default ``SearchResult`` object.
+
+Example::
+
+ SearchQuerySet().result_class(CustomResult)
+
+``boost``
+~~~~~~~~~
+
+.. method:: SearchQuerySet.boost(self, term, boost_value)
+
+Boosts a certain term of the query. You provide the term to be boosted and the
+value is the amount to boost it by. Boost amounts may be either an integer or a
+float.
+
+Example::
+
+ SearchQuerySet().filter(content='foo').boost('bar', 1.5)
+
+``facet``
+~~~~~~~~~
+
+.. method:: SearchQuerySet.facet(self, field, **options)
+
+Adds faceting to a query for the provided field. You provide the field (from one
+of the ``SearchIndex`` classes) you like to facet on. Any keyword options you
+provide will be passed along to the backend for that facet.
+
+Example::
+
+ # For SOLR (setting f.author.facet.*; see http://wiki.apache.org/solr/SimpleFacetParameters#Parameters)
+ SearchQuerySet().facet('author', mincount=1, limit=10)
+ # For ElasticSearch (see http://www.elasticsearch.org/guide/reference/api/search/facets/terms-facet.html)
+ SearchQuerySet().facet('author', size=10, order='term')
+
+In the search results you get back, facet counts will be populated in the
+``SearchResult`` object. You can access them via the ``facet_counts`` method.
+
+Example::
+
+ # Count document hits for each author within the index.
+ SearchQuerySet().filter(content='foo').facet('author')
+
+``date_facet``
+~~~~~~~~~~~~~~
+
+.. method:: SearchQuerySet.date_facet(self, field, start_date, end_date, gap_by, gap_amount=1)
+
+Adds faceting to a query for the provided field by date. You provide the field
+(from one of the ``SearchIndex`` classes) you like to facet on, a ``start_date``
+(either ``datetime.datetime`` or ``datetime.date``), an ``end_date`` and the
+amount of time between gaps as ``gap_by`` (one of ``'year'``, ``'month'``,
+``'day'``, ``'hour'``, ``'minute'`` or ``'second'``).
+
+You can also optionally provide a ``gap_amount`` to specify a different
+increment than ``1``. For example, specifying gaps by week (every seven days)
+would be ``gap_by='day', gap_amount=7``).
+
+In the search results you get back, facet counts will be populated in the
+``SearchResult`` object. You can access them via the ``facet_counts`` method.
+
+Example::
+
+ # Count document hits for each day between 2009-06-07 to 2009-07-07 within the index.
+ SearchQuerySet().filter(content='foo').date_facet('pub_date', start_date=datetime.date(2009, 6, 7), end_date=datetime.date(2009, 7, 7), gap_by='day')
+
+``query_facet``
+~~~~~~~~~~~~~~~
+
+.. method:: SearchQuerySet.query_facet(self, field, query)
+
+Adds faceting to a query for the provided field with a custom query. You provide
+the field (from one of the ``SearchIndex`` classes) you like to facet on and the
+backend-specific query (as a string) you'd like to execute.
+
+Please note that this is **NOT** portable between backends. The syntax is entirely
+dependent on the backend. No validation/cleansing is performed and it is up to
+the developer to ensure the query's syntax is correct.
+
+In the search results you get back, facet counts will be populated in the
+``SearchResult`` object. You can access them via the ``facet_counts`` method.
+
+Example::
+
+ # Count document hits for authors that start with 'jo' within the index.
+ SearchQuerySet().filter(content='foo').query_facet('author', 'jo*')
+
+``within``
+~~~~~~~~~~
+
+.. method:: SearchQuerySet.within(self, field, point_1, point_2):
+
+Spatial: Adds a bounding box search to the query.
+
+See the :ref:`ref-spatial` docs for more information.
+
+``dwithin``
+~~~~~~~~~~~
+
+.. method:: SearchQuerySet.dwithin(self, field, point, distance):
+
+Spatial: Adds a distance-based search to the query.
+
+See the :ref:`ref-spatial` docs for more information.
+
+``stats``
+~~~~~~~~~
+
+.. method:: SearchQuerySet.stats(self, field):
+
+Adds stats to a query for the provided field. This is supported on
+Solr only. You provide the field (from one of the ``SearchIndex``
+classes) you would like stats on.
+
+In the search results you get back, stats will be populated in the
+``SearchResult`` object. You can access them via the `` stats_results`` method.
+
+Example::
+
+ # Get stats on the author field.
+ SearchQuerySet().filter(content='foo').stats('author')
+
+``stats_facet``
+~~~~~~~~~~~~~~~
+.. method:: SearchQuerySet.stats_facet(self, field,
+.. facet_fields=None):
+
+Adds stats facet for the given field and facet_fields represents the
+faceted fields. This is supported on Solr only.
+
+Example::
+
+ # Get stats on the author field, and stats on the author field
+ faceted by bookstore.
+ SearchQuerySet().filter(content='foo').stats_facet('author','bookstore')
+
+
+``distance``
+~~~~~~~~~~~~
+.. method:: SearchQuerySet.distance(self, field, point):
+
+Spatial: Denotes results must have distance measurements from the
+provided point.
+
+See the :ref:`ref-spatial` docs for more information.
+
+``narrow``
+~~~~~~~~~~
+
+.. method:: SearchQuerySet.narrow(self, query)
+
+Pulls a subset of documents from the search engine to search within. This is
+for advanced usage, especially useful when faceting.
+
+Example::
+
+ # Search, from recipes containing 'blend', for recipes containing 'banana'.
+ SearchQuerySet().narrow('blend').filter(content='banana')
+
+ # Using a fielded search where the recipe's title contains 'smoothie', find all recipes published before 2009.
+ SearchQuerySet().narrow('title:smoothie').filter(pub_date__lte=datetime.datetime(2009, 1, 1))
+
+By using ``narrow``, you can create drill-down interfaces for faceting by
+applying ``narrow`` calls for each facet that gets selected.
+
+This method is different from ``SearchQuerySet.filter()`` in that it does not
+affect the query sent to the engine. It pre-limits the document set being
+searched. Generally speaking, if you're in doubt of whether to use
+``filter`` or ``narrow``, use ``filter``.
+
+.. note::
+
+ This method is, generally speaking, not necessarily portable between
+ backends. The syntax is entirely dependent on the backend, though most
+ backends have a similar syntax for basic fielded queries. No
+ validation/cleansing is performed and it is up to the developer to ensure
+ the query's syntax is correct.
+
+``raw_search``
+~~~~~~~~~~~~~~
+
+.. method:: SearchQuerySet.raw_search(self, query_string, **kwargs)
+
+Passes a raw query directly to the backend. This is for advanced usage, where
+the desired query can not be expressed via ``SearchQuerySet``.
+
+This method is still supported, however it now uses the much more flexible
+``Raw`` input type (:ref:`ref-inputtypes`).
+
+.. warning::
+
+ Different from Haystack 1.X, this method no longer causes immediate
+ evaluation & now chains appropriately.
+
+Example::
+
+ # In the case of Solr... (this example could be expressed with SearchQuerySet)
+ SearchQuerySet().raw_search('django_ct:blog.blogentry "However, it is"')
+
+ # Equivalent.
+ from haystack.inputs import Raw
+ sqs = SearchQuerySet().filter(content=Raw('django_ct:blog.blogentry "However, it is"'))
+
+Please note that this is **NOT** portable between backends. The syntax is entirely
+dependent on the backend. No validation/cleansing is performed and it is up to
+the developer to ensure the query's syntax is correct.
+
+Further, the use of ``**kwargs`` are completely undocumented intentionally. If
+a third-party backend can implement special features beyond what's present, it
+should use those ``**kwargs`` for passing that information. Developers should
+be careful to make sure there are no conflicts with the backend's ``search``
+method, as that is called directly.
+
+``load_all``
+~~~~~~~~~~~~
+
+.. method:: SearchQuerySet.load_all(self)
+
+Efficiently populates the objects in the search results. Without using this
+method, DB lookups are done on a per-object basis, resulting in many individual
+trips to the database. If ``load_all`` is used, the ``SearchQuerySet`` will
+group similar objects into a single query, resulting in only as many queries as
+there are different object types returned.
+
+Example::
+
+ SearchQuerySet().filter(content='foo').load_all()
+
+``auto_query``
+~~~~~~~~~~~~~~
+
+.. method:: SearchQuerySet.auto_query(self, query_string, fieldname=None)
+
+Performs a best guess constructing the search query.
+
+This method is intended for common use directly with a user's query. This
+method is still supported, however it now uses the much more flexible
+``AutoQuery`` input type (:ref:`ref-inputtypes`).
+
+It handles exact matches (specified with single or double quotes), negation (
+using a ``-`` immediately before the term) and joining remaining terms with the
+operator specified in ``HAYSTACK_DEFAULT_OPERATOR``.
+
+Example::
+
+ sqs = SearchQuerySet().auto_query('goldfish "old one eye" -tank')
+
+ # Equivalent.
+ from haystack.inputs import AutoQuery
+ sqs = SearchQuerySet().filter(content=AutoQuery('goldfish "old one eye" -tank'))
+
+ # Against a different field.
+ sqs = SearchQuerySet().filter(title=AutoQuery('goldfish "old one eye" -tank'))
+
+
+``autocomplete``
+~~~~~~~~~~~~~~~~
+
+A shortcut method to perform an autocomplete search.
+
+Must be run against fields that are either ``NgramField`` or
+``EdgeNgramField``.
+
+Example::
+
+ SearchQuerySet().autocomplete(title_autocomplete='gol')
+
+``more_like_this``
+~~~~~~~~~~~~~~~~~~
+
+.. method:: SearchQuerySet.more_like_this(self, model_instance)
+
+Finds similar results to the object passed in.
+
+You should pass in an instance of a model (for example, one fetched via a
+``get`` in Django's ORM). This will execute a query on the backend that searches
+for similar results. The instance you pass in should be an indexed object.
+Previously called methods will have an effect on the provided results.
+
+It will evaluate its own backend-specific query and populate the
+``SearchQuerySet`` in the same manner as other methods.
+
+Example::
+
+ entry = Entry.objects.get(slug='haystack-one-oh-released')
+ mlt = SearchQuerySet().more_like_this(entry)
+ mlt.count() # 5
+ mlt[0].object.title # "Haystack Beta 1 Released"
+
+ # ...or...
+ mlt = SearchQuerySet().filter(public=True).exclude(pub_date__lte=datetime.date(2009, 7, 21)).more_like_this(entry)
+ mlt.count() # 2
+ mlt[0].object.title # "Haystack Beta 1 Released"
+
+``using``
+~~~~~~~~~
+
+.. method:: SearchQuerySet.using(self, connection_name)
+
+Allows switching which connection the ``SearchQuerySet`` uses to search in.
+
+Example::
+
+ # Let the routers decide which connection to use.
+ sqs = SearchQuerySet().all()
+
+ # Specify the 'default'.
+ sqs = SearchQuerySet().all().using('default')
+
+
+Methods That Do Not Return A ``SearchQuerySet``
+-----------------------------------------------
+
+``count``
+~~~~~~~~~
+
+.. method:: SearchQuerySet.count(self)
+
+Returns the total number of matching results.
+
+This returns an integer count of the total number of results the search backend
+found that matched. This method causes the query to evaluate and run the search.
+
+Example::
+
+ SearchQuerySet().filter(content='foo').count()
+
+``best_match``
+~~~~~~~~~~~~~~
+
+.. method:: SearchQuerySet.best_match(self)
+
+Returns the best/top search result that matches the query.
+
+This method causes the query to evaluate and run the search. This method returns
+a ``SearchResult`` object that is the best match the search backend found::
+
+ foo = SearchQuerySet().filter(content='foo').best_match()
+ foo.id # Something like 5.
+
+ # Identical to:
+ foo = SearchQuerySet().filter(content='foo')[0]
+
+``latest``
+~~~~~~~~~~
+
+.. method:: SearchQuerySet.latest(self, date_field)
+
+Returns the most recent search result that matches the query.
+
+This method causes the query to evaluate and run the search. This method returns
+a ``SearchResult`` object that is the most recent match the search backend
+found::
+
+ foo = SearchQuerySet().filter(content='foo').latest('pub_date')
+ foo.id # Something like 3.
+
+ # Identical to:
+ foo = SearchQuerySet().filter(content='foo').order_by('-pub_date')[0]
+
+``facet_counts``
+~~~~~~~~~~~~~~~~
+
+.. method:: SearchQuerySet.facet_counts(self)
+
+Returns the facet counts found by the query. This will cause the query to
+execute and should generally be used when presenting the data (template-level).
+
+You receive back a dictionary with three keys: ``fields``, ``dates`` and
+``queries``. Each contains the facet counts for whatever facets you specified
+within your ``SearchQuerySet``.
+
+.. note::
+
+ The resulting dictionary may change before 1.0 release. It's fairly
+ backend-specific at the time of writing. Standardizing is waiting on
+ implementing other backends that support faceting and ensuring that the
+ results presented will meet their needs as well.
+
+Example::
+
+ # Count document hits for each author.
+ sqs = SearchQuerySet().filter(content='foo').facet('author')
+
+ sqs.facet_counts()
+ # Gives the following response:
+ # {
+ # 'dates': {},
+ # 'fields': {
+ # 'author': [
+ # ('john', 4),
+ # ('daniel', 2),
+ # ('sally', 1),
+ # ('terry', 1),
+ # ],
+ # },
+ # 'queries': {}
+ # }
+
+``stats_results``
+~~~~~~~~~~~~~~~~~
+
+.. method:: SearchQuerySet.stats_results(self):
+
+Returns the stats results found by the query.
+
+This will cause the query to execute and should generally be used when
+presenting the data (template-level).
+
+You receive back a dictionary with three keys: ``fields``, ``dates`` and
+``queries``. Each contains the facet counts for whatever facets you specified
+within your ``SearchQuerySet``.
+
+.. note::
+
+ The resulting dictionary may change before 1.0 release. It's fairly
+ backend-specific at the time of writing. Standardizing is waiting on
+ implementing other backends that support faceting and ensuring that the
+ results presented will meet their needs as well.
+
+Example::
+
+ # Count document hits for each author.
+ sqs = SearchQuerySet().filter(content='foo').stats('price')
+
+ sqs.stats_results()
+
+ # Gives the following response
+ # {
+ # 'stats_fields':{
+ # 'author:{
+ # 'min': 0.0,
+ # 'max': 2199.0,
+ # 'sum': 5251.2699999999995,
+ # 'count': 15,
+ # 'missing': 11,
+ # 'sumOfSquares': 6038619.160300001,
+ # 'mean': 350.08466666666664,
+ # 'stddev': 547.737557906113
+ # }
+ # }
+ #
+ # }
+
+
+``spelling_suggestion``
+~~~~~~~~~~~~~~~~~~~~~~~
+
+.. method:: SearchQuerySet.spelling_suggestion(self, preferred_query=None)
+
+Returns the spelling suggestion found by the query.
+
+To work, you must set ``INCLUDE_SPELLING`` within your connection's
+settings dictionary to ``True``, and you must rebuild your index afterwards.
+Otherwise, ``None`` will be returned.
+
+This method causes the query to evaluate and run the search if it hasn't already
+run. Search results will be populated as normal but with an additional spelling
+suggestion. Note that this does *NOT* run the revised query, only suggests
+improvements.
+
+If provided, the optional argument to this method lets you specify an alternate
+query for the spelling suggestion to be run on. This is useful for passing along
+a raw user-provided query, especially when there are many methods chained on the
+``SearchQuerySet``.
+
+Example::
+
+ sqs = SearchQuerySet().auto_query('mor exmples')
+ sqs.spelling_suggestion() # u'more examples'
+
+ # ...or...
+ suggestion = SearchQuerySet().spelling_suggestion('moar exmples')
+ suggestion # u'more examples'
+
+``values``
+~~~~~~~~~~
+
+.. method:: SearchQuerySet.values(self, *fields)
+
+Returns a list of dictionaries, each containing the key/value pairs for the
+result, exactly like Django's ``ValuesQuerySet``.
+
+This method causes the query to evaluate and run the search if it hasn't already
+run.
+
+You must provide a list of one or more fields as arguments. These fields will
+be the ones included in the individual results.
+
+Example::
+
+ sqs = SearchQuerySet().auto_query('banana').values('title', 'description')
+
+
+``values_list``
+~~~~~~~~~~~~~~~
+
+.. method:: SearchQuerySet.values_list(self, *fields, **kwargs)
+
+Returns a list of field values as tuples, exactly like Django's
+``ValuesListQuerySet``.
+
+This method causes the query to evaluate and run the search if it hasn't already
+run.
+
+You must provide a list of one or more fields as arguments. These fields will
+be the ones included in the individual results.
+
+You may optionally also provide a ``flat=True`` kwarg, which in the case of a
+single field being provided, will return a flat list of that field rather than
+a list of tuples.
+
+Example::
+
+ sqs = SearchQuerySet().auto_query('banana').values_list('title', 'description')
+
+ # ...or just the titles as a flat list...
+ sqs = SearchQuerySet().auto_query('banana').values_list('title', flat=True)
+
+
+.. _field-lookups:
+
+Field Lookups
+-------------
+
+The following lookup types are supported:
+
+* contains
+* exact
+* gt
+* gte
+* lt
+* lte
+* in
+* startswith
+* range
+
+These options are similar in function to the way Django's lookup types work.
+The actual behavior of these lookups is backend-specific.
+
+.. warning::
+
+ The ``startswith`` filter is strongly affected by the other ways the engine
+ parses data, especially in regards to stemming (see :doc:`glossary`). This
+ can mean that if the query ends in a vowel or a plural form, it may get
+ stemmed before being evaluated.
+
+ This is both backend-specific and yet fairly consistent between engines,
+ and may be the cause of sometimes unexpected results.
+
+.. warning::
+
+ The ``contains`` filter became the new default filter as of Haystack v2.X
+ (the default in Haystack v1.X was ``exact``). This changed because ``exact``
+ caused problems and was unintuitive for new people trying to use Haystack.
+ ``contains`` is a much more natural usage.
+
+ If you had an app built on Haystack v1.X & are upgrading, you'll need to
+ sanity-check & possibly change any code that was relying on the default.
+ The solution is just to add ``__exact`` to any "bare" field in a
+ ``.filter(...)`` clause.
+
+Example::
+
+ SearchQuerySet().filter(content='foo')
+
+ # Identical to:
+ SearchQuerySet().filter(content__contains='foo')
+
+ # Phrase matching.
+ SearchQuerySet().filter(content__exact='hello world')
+
+ # Other usages look like:
+ SearchQuerySet().filter(pub_date__gte=datetime.date(2008, 1, 1), pub_date__lt=datetime.date(2009, 1, 1))
+ SearchQuerySet().filter(author__in=['daniel', 'john', 'jane'])
+ SearchQuerySet().filter(view_count__range=[3, 5])
+
+
+``EmptySearchQuerySet``
+=======================
+
+Also included in Haystack is an ``EmptySearchQuerySet`` class. It behaves just
+like ``SearchQuerySet`` but will always return zero results. This is useful for
+places where you want no query to occur or results to be returned.
+
+
+``RelatedSearchQuerySet``
+=========================
+
+Sometimes you need to filter results based on relations in the database that are
+not present in the search index or are difficult to express that way. To this
+end, ``RelatedSearchQuerySet`` allows you to post-process the search results by
+calling ``load_all_queryset``.
+
+.. warning::
+
+ ``RelatedSearchQuerySet`` can have negative performance implications.
+ Because results are excluded based on the database after the search query
+ has been run, you can't guarantee offsets within the cache. Therefore, the
+ entire cache that appears before the offset you request must be filled in
+ order to produce consistent results. On large result sets and at higher
+ slices, this can take time.
+
+ This is the old behavior of ``SearchQuerySet``, so performance is no worse
+ than the early days of Haystack.
+
+It supports all other methods that the standard ``SearchQuerySet`` does, with
+the addition of the ``load_all_queryset`` method and paying attention to the
+``load_all_queryset`` method of ``SearchIndex`` objects when populating the
+cache.
+
+``load_all_queryset``
+---------------------
+
+.. method:: RelatedSearchQuerySet.load_all_queryset(self, model_class, queryset)
+
+Allows for specifying a custom ``QuerySet`` that changes how ``load_all`` will
+fetch records for the provided model. This is useful for post-processing the
+results from the query, enabling things like adding ``select_related`` or
+filtering certain data.
+
+Example::
+
+ sqs = RelatedSearchQuerySet().filter(content='foo').load_all()
+ # For the Entry model, we want to include related models directly associated
+ # with the Entry to save on DB queries.
+ sqs = sqs.load_all_queryset(Entry, Entry.objects.all().select_related(depth=1))
+
+This method chains indefinitely, so you can specify ``QuerySets`` for as many
+models as you wish, one per model. The ``SearchQuerySet`` appends on a call to
+``in_bulk``, so be sure that the ``QuerySet`` you provide can accommodate this
+and that the ids passed to ``in_bulk`` will map to the model in question.
+
+If you need to do this frequently and have one ``QuerySet`` you'd like to apply
+everywhere, you can specify this at the ``SearchIndex`` level using the
+``load_all_queryset`` method. See :doc:`searchindex_api` for usage.
diff --git a/docs/searchresult_api.rst b/docs/searchresult_api.rst
new file mode 100644
index 0000000..ea506f2
--- /dev/null
+++ b/docs/searchresult_api.rst
@@ -0,0 +1,62 @@
+.. _ref-searchresult-api:
+
+====================
+``SearchResult`` API
+====================
+
+.. class:: SearchResult(app_label, model_name, pk, score, **kwargs)
+
+The ``SearchResult`` class provides structure to the results that come back from
+the search index. These objects are what a ``SearchQuerySet`` will return when
+evaluated.
+
+
+Attribute Reference
+===================
+
+The class exposes the following useful attributes/properties:
+
+* ``app_label`` - The application the model is attached to.
+* ``model_name`` - The model's name.
+* ``pk`` - The primary key of the model.
+* ``score`` - The score provided by the search engine.
+* ``object`` - The actual model instance (lazy loaded).
+* ``model`` - The model class.
+* ``verbose_name`` - A prettier version of the model's class name for display.
+* ``verbose_name_plural`` - A prettier version of the model's *plural* class name for display.
+* ``searchindex`` - Returns the ``SearchIndex`` class associated with this
+ result.
+* ``distance`` - On geo-spatial queries, this returns a ``Distance`` object
+ representing the distance the result was from the focused point.
+
+
+Method Reference
+================
+
+``content_type``
+----------------
+
+.. method:: SearchResult.content_type(self)
+
+Returns the content type for the result's model instance.
+
+``get_additional_fields``
+-------------------------
+
+.. method:: SearchResult.get_additional_fields(self)
+
+Returns a dictionary of all of the fields from the raw result.
+
+Useful for serializing results. Only returns what was seen from the
+search engine, so it may have extra fields Haystack's indexes aren't
+aware of.
+
+``get_stored_fields``
+---------------------
+
+.. method:: SearchResult.get_stored_fields(self)
+
+Returns a dictionary of all of the stored fields from the SearchIndex.
+
+Useful for serializing results. Only returns the fields Haystack's
+indexes are aware of as being 'stored'.
diff --git a/docs/settings.rst b/docs/settings.rst
new file mode 100644
index 0000000..c60752e
--- /dev/null
+++ b/docs/settings.rst
@@ -0,0 +1,289 @@
+.. _ref-settings:
+
+=================
+Haystack Settings
+=================
+
+As a way to extend/change the default behavior within Haystack, there are
+several settings you can alter within your ``settings.py``. This is a
+comprehensive list of the settings Haystack recognizes.
+
+
+``HAYSTACK_DEFAULT_OPERATOR``
+=============================
+
+**Optional**
+
+This setting controls what the default behavior for chaining ``SearchQuerySet``
+filters together is.
+
+Valid options are::
+
+ HAYSTACK_DEFAULT_OPERATOR = 'AND'
+ HAYSTACK_DEFAULT_OPERATOR = 'OR'
+
+Defaults to ``AND``.
+
+
+``HAYSTACK_CONNECTIONS``
+========================
+
+**Required**
+
+This setting controls which backends should be available. It should be a
+dictionary of dictionaries resembling the following (complete) example::
+
+ HAYSTACK_CONNECTIONS = {
+ 'default': {
+ 'ENGINE': 'haystack.backends.solr_backend.SolrEngine',
+ 'URL': 'http://localhost:9001/solr/default',
+ 'TIMEOUT': 60 * 5,
+ 'INCLUDE_SPELLING': True,
+ 'BATCH_SIZE': 100,
+ 'EXCLUDED_INDEXES': ['thirdpartyapp.search_indexes.BarIndex'],
+ },
+ 'autocomplete': {
+ 'ENGINE': 'haystack.backends.whoosh_backend.WhooshEngine',
+ 'PATH': '/home/search/whoosh_index',
+ 'STORAGE': 'file',
+ 'POST_LIMIT': 128 * 1024 * 1024,
+ 'INCLUDE_SPELLING': True,
+ 'BATCH_SIZE': 100,
+ 'EXCLUDED_INDEXES': ['thirdpartyapp.search_indexes.BarIndex'],
+ },
+ 'slave': {
+ 'ENGINE': 'xapian_backend.XapianEngine',
+ 'PATH': '/home/search/xapian_index',
+ 'INCLUDE_SPELLING': True,
+ 'BATCH_SIZE': 100,
+ 'EXCLUDED_INDEXES': ['thirdpartyapp.search_indexes.BarIndex'],
+ },
+ 'db': {
+ 'ENGINE': 'haystack.backends.simple_backend.SimpleEngine',
+ 'EXCLUDED_INDEXES': ['thirdpartyapp.search_indexes.BarIndex'],
+ }
+ }
+
+No default for this setting is provided.
+
+The main keys (``default`` & friends) are identifiers for your application.
+You can use them any place the API exposes ``using`` as a method or kwarg.
+
+There must always be at least a ``default`` key within this setting.
+
+The ``ENGINE`` option is required for all backends & should point to the
+``BaseEngine`` subclass for the backend.
+
+Additionally, each backend may have additional options it requires:
+
+* Solr
+
+ * ``URL`` - The URL to the Solr core.
+
+* Whoosh
+
+ * ``PATH`` - The filesystem path to where the index data is located.
+
+* Xapian
+
+ * ``PATH`` - The filesystem path to where the index data is located.
+
+The following options are optional:
+
+* ``INCLUDE_SPELLING`` - Include spelling suggestions. Default is ``False``
+* ``BATCH_SIZE`` - How many records should be updated at once via the management
+ commands. Default is ``1000``.
+* ``TIMEOUT`` - (Solr and ElasticSearch) How long to wait (in seconds) before
+ the connection times out. Default is ``10``.
+* ``STORAGE`` - (Whoosh-only) Which storage engine to use. Accepts ``file`` or
+ ``ram``. Default is ``file``.
+* ``POST_LIMIT`` - (Whoosh-only) How large the file sizes can be. Default is
+ ``128 * 1024 * 1024``.
+* ``FLAGS`` - (Xapian-only) A list of flags to use when querying the index.
+* ``EXCLUDED_INDEXES`` - A list of strings (as Python import paths) to indexes
+ you do **NOT** want included. Useful for omitting third-party things you
+ don't want indexed or for when you want to replace an index.
+* ``KWARGS`` - (Solr and ElasticSearch) Any additional keyword arguments that
+ should be passed on to the underlying client library.
+
+
+``HAYSTACK_ROUTERS``
+====================
+
+**Optional**
+
+This setting controls how routing is performed to allow different backends to
+handle updates/deletes/reads.
+
+An example::
+
+ HAYSTACK_ROUTERS = ['search_routers.MasterSlaveRouter', 'haystack.routers.DefaultRouter']
+
+Defaults to ``['haystack.routers.DefaultRouter']``.
+
+
+``HAYSTACK_SIGNAL_PROCESSOR``
+=============================
+
+**Optional**
+
+This setting controls what ``SignalProcessor`` class is used to handle Django's
+signals & keep the search index up-to-date.
+
+An example::
+
+ HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.RealtimeSignalProcessor'
+
+Defaults to ``'haystack.signals.BaseSignalProcessor'``.
+
+
+``HAYSTACK_DOCUMENT_FIELD``
+===========================
+
+**Optional**
+
+This setting controls what fieldname Haystack relies on as the default field
+for searching within.
+
+An example::
+
+ HAYSTACK_DOCUMENT_FIELD = 'wall_o_text'
+
+Defaults to ``text``.
+
+
+``HAYSTACK_SEARCH_RESULTS_PER_PAGE``
+====================================
+
+**Optional**
+
+This setting controls how many results are shown per page when using the
+included ``SearchView`` and its subclasses.
+
+An example::
+
+ HAYSTACK_SEARCH_RESULTS_PER_PAGE = 50
+
+Defaults to ``20``.
+
+
+``HAYSTACK_CUSTOM_HIGHLIGHTER``
+===============================
+
+**Optional**
+
+This setting allows you to specify your own custom ``Highlighter``
+implementation for use with the ``{% highlight %}`` template tag. It should be
+the full path to the class.
+
+An example::
+
+ HAYSTACK_CUSTOM_HIGHLIGHTER = 'myapp.utils.BorkHighlighter'
+
+No default is provided. Haystack automatically falls back to the default
+implementation.
+
+
+``HAYSTACK_ITERATOR_LOAD_PER_QUERY``
+====================================
+
+**Optional**
+
+This setting controls the number of results that are pulled at once when
+iterating through a ``SearchQuerySet``. If you generally consume large portions
+at a time, you can bump this up for better performance.
+
+.. note::
+
+ This is not used in the case of a slice on a ``SearchQuerySet``, which
+ already overrides the number of results pulled at once.
+
+An example::
+
+ HAYSTACK_ITERATOR_LOAD_PER_QUERY = 100
+
+The default is 10 results at a time.
+
+
+``HAYSTACK_LIMIT_TO_REGISTERED_MODELS``
+=======================================
+
+**Optional**
+
+This setting allows you to control whether or not Haystack will limit the
+search results seen to just the models registered. It should be a boolean.
+
+If your search index is never used for anything other than the models
+registered with Haystack, you can turn this off and get a small to moderate
+performance boost.
+
+An example::
+
+ HAYSTACK_LIMIT_TO_REGISTERED_MODELS = False
+
+Default is ``True``.
+
+
+``HAYSTACK_ID_FIELD``
+=====================
+
+**Optional**
+
+This setting allows you to control what the unique field name used internally
+by Haystack is called. Rarely needed unless your field names collide with
+Haystack's defaults.
+
+An example::
+
+ HAYSTACK_ID_FIELD = 'my_id'
+
+Default is ``id``.
+
+
+``HAYSTACK_DJANGO_CT_FIELD``
+============================
+
+**Optional**
+
+This setting allows you to control what the content type field name used
+internally by Haystack is called. Rarely needed unless your field names
+collide with Haystack's defaults.
+
+An example::
+
+ HAYSTACK_DJANGO_CT_FIELD = 'my_django_ct'
+
+Default is ``django_ct``.
+
+
+``HAYSTACK_DJANGO_ID_FIELD``
+============================
+
+**Optional**
+
+This setting allows you to control what the primary key field name used
+internally by Haystack is called. Rarely needed unless your field names
+collide with Haystack's defaults.
+
+An example::
+
+ HAYSTACK_DJANGO_ID_FIELD = 'my_django_id'
+
+Default is ``django_id``.
+
+
+``HAYSTACK_IDENTIFIER_METHOD``
+==============================
+
+**Optional**
+
+This setting allows you to provide a custom method for
+``haystack.utils.get_identifier``. Useful when the default identifier
+pattern of <app.label>.<object_name>.<pk> isn't suited to your
+needs.
+
+An example::
+
+ HAYSTACK_IDENTIFIER_METHOD = 'my_app.module.get_identifier'
+
+Default is ``haystack.utils.default_get_identifier``.
diff --git a/docs/signal_processors.rst b/docs/signal_processors.rst
new file mode 100644
index 0000000..3865b72
--- /dev/null
+++ b/docs/signal_processors.rst
@@ -0,0 +1,117 @@
+.. _ref-signal_processors:
+
+=================
+Signal Processors
+=================
+
+Keeping data in sync between the (authoritative) database & the
+(non-authoritative) search index is one of the more difficult problems when
+using Haystack. Even frequently running the ``update_index`` management command
+still introduces lag between when the data is stored & when it's available
+for searching.
+
+A solution to this is to incorporate Django's signals (specifically
+``models.db.signals.post_save`` & ``models.db.signals.post_delete``), which then
+trigger *individual* updates to the search index, keeping them in near-perfect
+sync.
+
+Older versions of Haystack (pre-v2.0) tied the ``SearchIndex`` directly to the
+signals, which caused occasional conflicts of interest with third-party
+applications.
+
+To solve this, starting with Haystack v2.0, the concept of a ``SignalProcessor``
+has been introduced. In it's simplest form, the ``SignalProcessor`` listens
+to whatever signals are setup & can be configured to then trigger the updates
+without having to change any ``SearchIndex`` code.
+
+.. warning::
+
+ Incorporating Haystack's ``SignalProcessor`` into your setup **will**
+ increase the overall load (CPU & perhaps I/O depending on configuration).
+ You will need to capacity plan for this & ensure you can make the tradeoff
+ of more real-time results for increased load.
+
+
+Default - ``BaseSignalProcessor``
+=================================
+
+The default setup is configured to use the
+``haystack.signals.BaseSignalProcessor`` class, which includes all the
+underlying code necessary to handle individual updates/deletes, **BUT DOES NOT
+HOOK UP THE SIGNALS**.
+
+This means that, by default, **NO ACTION IS TAKEN BY HAYSTACK** when a model is
+saved or deleted. The ``BaseSignalProcessor.setup`` &
+``BaseSignalProcessor.teardown`` methods are both empty to prevent anything
+from being setup at initialization time.
+
+This usage is configured very simply (again, by default) with the
+``HAYSTACK_SIGNAL_PROCESSOR`` setting. An example of manually setting this
+would look like::
+
+ HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.BaseSignalProcessor'
+
+This class forms an excellent base if you'd like to override/extend for more
+advanced behavior. Which leads us to...
+
+
+Realtime - ``RealtimeSignalProcessor``
+======================================
+
+The other included ``SignalProcessor`` is the
+``haystack.signals.RealtimeSignalProcessor`` class. It is an extremely thin
+extension of the ``BaseSignalProcessor`` class, differing only in that
+in implements the ``setup/teardown`` methods, tying **ANY** Model
+``save/delete`` to the signal processor.
+
+If the model has an associated ``SearchIndex``, the ``RealtimeSignalProcessor``
+will then trigger an update/delete of that model instance within the search
+index proper.
+
+Configuration looks like::
+
+ HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.RealtimeSignalProcessor'
+
+This causes **all** ``SearchIndex`` classes to work in a realtime fashion.
+
+.. note::
+
+ These updates happen in-process, which if a request-response cycle is
+ involved, may cause the user with the browser to sit & wait for indexing to
+ be completed. Since this wait can be undesirable, especially under load,
+ you may wish to look into queued search options. See the
+ :ref:`ref-other_apps` documentation for existing options.
+
+
+Custom ``SignalProcessors``
+===========================
+
+The ``BaseSignalProcessor`` & ``RealtimeSignalProcessor`` classes are fairly
+simple/straightforward to customize or extend. Rather than forking Haystack to
+implement your modifications, you should create your own subclass within your
+codebase (anywhere that's importable is usually fine, though you should avoid
+``models.py`` files).
+
+For instance, if you only wanted ``User`` saves to be realtime, deferring all
+other updates to the management commands, you'd implement the following code::
+
+ from django.contrib.auth.models import User
+ from django.db import models
+ from haystack import signals
+
+
+ class UserOnlySignalProcessor(signals.BaseSignalProcessor):
+ def setup(self):
+ # Listen only to the ``User`` model.
+ models.signals.post_save.connect(self.handle_save, sender=User)
+ models.signals.post_delete.connect(self.handle_delete, sender=User)
+
+ def teardown(self):
+ # Disconnect only for the ``User`` model.
+ models.signals.post_save.disconnect(self.handle_save, sender=User)
+ models.signals.post_delete.disconnect(self.handle_delete, sender=User)
+
+For other customizations (modifying how saves/deletes should work), you'll need
+to override/extend the ``handle_save/handle_delete`` methods. The source code
+is your best option for referring to how things currently work on your version
+of Haystack.
diff --git a/docs/spatial.rst b/docs/spatial.rst
new file mode 100644
index 0000000..07dda7e
--- /dev/null
+++ b/docs/spatial.rst
@@ -0,0 +1,412 @@
+.. _ref-spatial:
+
+==============
+Spatial Search
+==============
+
+Spatial search (also called geospatial search) allows you to take data that
+has a geographic location & enhance the search results by limiting them to a
+physical area. Haystack, combined with the latest versions of a couple engines,
+can provide this type of search.
+
+In addition, Haystack tries to implement these features in a way that is as
+close to GeoDjango_ as possible. There are some differences, which we'll
+highlight throughout this guide. Additionally, while the support isn't as
+comprehensive as PostGIS (for example), it is still quite useful.
+
+.. _GeoDjango: http://geodjango.org/
+
+
+Additional Requirements
+=======================
+
+The spatial functionality has only one non-included, non-available-in-Django
+dependency:
+
+* ``geopy`` - ``pip install geopy``
+
+If you do not ever need distance information, you may be able to skip
+installing ``geopy``.
+
+
+Support
+=======
+
+You need the latest & greatest of either Solr or Elasticsearch. None of the
+other backends (specifially the engines) support this kind of search.
+
+For Solr_, you'll need at least **v3.5+**. In addition, if you have an existing
+install of Haystack & Solr, you'll need to upgrade the schema & reindex your
+data. If you're adding geospatial data, you would have to reindex anyhow.
+
+For Elasticsearch, you'll need at least v0.17.7, preferably v0.18.6 or better.
+If you're adding geospatial data, you'll have to reindex as well.
+
+.. _Solr: http://lucene.apache.org/solr/
+
+====================== ====== =============== ======== ======== ======
+Lookup Type Solr Elasticsearch Whoosh Xapian Simple
+====================== ====== =============== ======== ======== ======
+`within` X X
+`dwithin` X X
+`distance` X X
+`order_by('distance')` X X
+`polygon` X
+====================== ====== =============== ======== ======== ======
+
+For more details, you can inspect http://wiki.apache.org/solr/SpatialSearch
+or http://www.elasticsearch.org/guide/reference/query-dsl/geo-bounding-box-filter.html.
+
+
+Geospatial Assumptions
+======================
+
+``Points``
+----------
+
+Haystack prefers to work with ``Point`` objects, which are located in
+``django.contrib.gis.geos.Point`` but conviently importable out of
+``haystack.utils.geo.Point``.
+
+``Point`` objects use **LONGITUDE, LATITUDE** for their construction, regardless
+if you use the parameters to instantiate them or WKT_/``GEOSGeometry``.
+
+.. _WKT: http://en.wikipedia.org/wiki/Well-known_text
+
+Examples::
+
+ # Using positional arguments.
+ from haystack.utils.geo import Point
+ pnt = Point(-95.23592948913574, 38.97127105172941)
+
+ # Using WKT.
+ from django.contrib.gis.geos import GEOSGeometry
+ pnt = GEOSGeometry('POINT(-95.23592948913574 38.97127105172941)')
+
+They are preferred over just providing ``latitude, longitude`` because they are
+more intelligent, have a spatial reference system attached & are more consistent
+with GeoDjango's use.
+
+
+``Distance``
+------------
+
+Haystack also uses the ``D`` (or ``Distance``) objects from GeoDjango,
+implemented in ``django.contrib.gis.measure.Distance`` but conveniently
+importable out of ``haystack.utils.geo.D`` (or ``haystack.utils.geo.Distance``).
+
+``Distance`` objects accept a very flexible set of measurements during
+instantiaton and can convert amongst them freely. This is important, because
+the engines rely on measurements being in kilometers but you're free to use
+whatever units you want.
+
+Examples::
+
+ from haystack.utils.geo import D
+
+ # Start at 5 miles.
+ imperial_d = D(mi=5)
+
+ # Convert to fathoms...
+ fathom_d = imperial_d.fathom
+
+ # Now to kilometers...
+ km_d = imperial_d.km
+
+ # And back to miles.
+ mi = imperial_d.mi
+
+They are preferred over just providing a raw distance because they are
+more intelligent, have a well-defined unit system attached & are consistent
+with GeoDjango's use.
+
+
+``WGS-84``
+----------
+
+All engines assume WGS-84 (SRID 4326). At the time of writing, there does **not**
+appear to be a way to switch this. Haystack will transform all points into this
+coordinate system for you.
+
+
+Indexing
+========
+
+Indexing is relatively simple. Simply add a ``LocationField`` (or several)
+onto your ``SearchIndex`` class(es) & provide them a ``Point`` object. For
+example::
+
+ from haystack import indexes
+ from shops.models import Shop
+
+
+ class ShopIndex(indexes.SearchIndex, indexes.Indexable):
+ text = indexes.CharField(document=True, use_template=True)
+ # ... the usual, then...
+ location = indexes.LocationField(model_attr='coordinates')
+
+ def get_model(self):
+ return Shop
+
+If you must manually prepare the data, you have to do something slightly less
+convenient, returning a string-ified version of the coordinates in WGS-84 as
+``lat,long``::
+
+ from haystack import indexes
+ from shops.models import Shop
+
+
+ class ShopIndex(indexes.SearchIndex, indexes.Indexable):
+ text = indexes.CharField(document=True, use_template=True)
+ # ... the usual, then...
+ location = indexes.LocationField()
+
+ def get_model(self):
+ return Shop
+
+ def prepare_location(self, obj):
+ # If you're just storing the floats...
+ return "%s,%s" % (obj.latitude, obj.longitude)
+
+Alternatively, you could build a method/property onto the ``Shop`` model that
+returns a ``Point`` based on those coordinates::
+
+ # shops/models.py
+ from django.contrib.gis.geos import Point
+ from django.db import models
+
+
+ class Shop(models.Model):
+ # ... the usual, then...
+ latitude = models.FloatField()
+ longitude = models.FloatField()
+
+ # Usual methods, then...
+ def get_location(self):
+ # Remember, longitude FIRST!
+ return Point(self.longitude, self.latitude)
+
+
+ # shops/search_indexes.py
+ from haystack import indexes
+ from shops.models import Shop
+
+
+ class ShopIndex(indexes.SearchIndex, indexes.Indexable):
+ text = indexes.CharField(document=True, use_template=True)
+ location = indexes.LocationField(model_attr='get_location')
+
+ def get_model(self):
+ return Shop
+
+
+Querying
+========
+
+There are two types of geospatial queries you can run, ``within`` & ``dwithin``.
+Like their GeoDjango counterparts (within_ & dwithin_), these methods focus on
+finding results within an area.
+
+.. _within: https://docs.djangoproject.com/en/dev/ref/contrib/gis/geoquerysets/#within
+.. _dwithin: https://docs.djangoproject.com/en/dev/ref/contrib/gis/geoquerysets/#dwithin
+
+
+``within``
+----------
+
+.. method:: SearchQuerySet.within(self, field, point_1, point_2)
+
+``within`` is a bounding box comparison. A bounding box is a rectangular area
+within which to search. It's composed of a bottom-left point & a top-right
+point. It is faster but slighty sloppier than its counterpart.
+
+Examples::
+
+ from haystack.query import SearchQuerySet
+ from haystack.utils.geo import Point
+
+ downtown_bottom_left = Point(-95.23947, 38.9637903)
+ downtown_top_right = Point(-95.23362278938293, 38.973081081164715)
+
+ # 'location' is the fieldname from our ``SearchIndex``...
+
+ # Do the bounding box query.
+ sqs = SearchQuerySet().within('location', downtown_bottom_left, downtown_top_right)
+
+ # Can be chained with other Haystack calls.
+ sqs = SearchQuerySet().auto_query('coffee').within('location', downtown_bottom_left, downtown_top_right).order_by('-popularity')
+
+.. note::
+
+ In GeoDjango, assuming the ``Shop`` model had been properly geo-ified, this
+ would have been implemented as::
+
+ from shops.models import Shop
+ Shop.objects.filter(location__within=(downtown_bottom_left, downtown_top_right))
+
+ Haystack's form differs because it yielded a cleaner implementation, was
+ no more typing than the GeoDjango version & tried to maintain the same
+ terminology/similar signature.
+
+
+``dwithin``
+-----------
+
+.. method:: SearchQuerySet.dwithin(self, field, point, distance)
+
+``dwithin`` is a radius-based search. A radius-based search is a circular area
+within which to search. It's composed of a center point & a radius (in
+kilometers, though Haystack will use the ``D`` object's conversion utilities to
+get it there). It is slower than``within`` but very exact & can involve fewer
+calculations on your part.
+
+Examples::
+
+ from haystack.query import SearchQuerySet
+ from haystack.utils.geo import Point, D
+
+ ninth_and_mass = Point(-95.23592948913574, 38.96753407043678)
+ # Within a two miles.
+ max_dist = D(mi=2)
+
+ # 'location' is the fieldname from our ``SearchIndex``...
+
+ # Do the radius query.
+ sqs = SearchQuerySet().dwithin('location', ninth_and_mass, max_dist)
+
+ # Can be chained with other Haystack calls.
+ sqs = SearchQuerySet().auto_query('coffee').dwithin('location', ninth_and_mass, max_dist).order_by('-popularity')
+
+.. note::
+
+ In GeoDjango, assuming the ``Shop`` model had been properly geo-ified, this
+ would have been implemented as::
+
+ from shops.models import Shop
+ Shop.objects.filter(location__dwithin=(ninth_and_mass, D(mi=2)))
+
+ Haystack's form differs because it yielded a cleaner implementation, was
+ no more typing than the GeoDjango version & tried to maintain the same
+ terminology/similar signature.
+
+
+``distance``
+------------
+
+.. method:: SearchQuerySet.distance(self, field, point)
+
+By default, search results will come back without distance information attached
+to them. In the concept of a bounding box, it would be ambiguous what the
+distances would be calculated against. And it is more calculation that may not
+be necessary.
+
+So like GeoDjango, Haystack exposes a method to signify that you want to
+include these calculated distances on results.
+
+Examples::
+
+ from haystack.query import SearchQuerySet
+ from haystack.utils.geo import Point, D
+
+ ninth_and_mass = Point(-95.23592948913574, 38.96753407043678)
+
+ # On a bounding box...
+ downtown_bottom_left = Point(-95.23947, 38.9637903)
+ downtown_top_right = Point(-95.23362278938293, 38.973081081164715)
+
+ sqs = SearchQuerySet().within('location', downtown_bottom_left, downtown_top_right).distance('location', ninth_and_mass)
+
+ # ...Or on a radius query.
+ sqs = SearchQuerySet().dwithin('location', ninth_and_mass, D(mi=2)).distance('location', ninth_and_mass)
+
+You can even apply a different field, for instance if you calculate results of
+key, well-cached hotspots in town but want distances from the user's current
+position::
+
+ from haystack.query import SearchQuerySet
+ from haystack.utils.geo import Point, D
+
+ ninth_and_mass = Point(-95.23592948913574, 38.96753407043678)
+ user_loc = Point(-95.23455619812012, 38.97240128290697)
+
+ sqs = SearchQuerySet().dwithin('location', ninth_and_mass, D(mi=2)).distance('location', user_loc)
+
+.. note::
+
+ The astute will notice this is Haystack's biggest departure from GeoDjango.
+ In GeoDjango, this would have been implemented as::
+
+ from shops.models import Shop
+ Shop.objects.filter(location__dwithin=(ninth_and_mass, D(mi=2))).distance(user_loc)
+
+ Note that, by default, the GeoDjango form leaves *out* the field to be
+ calculating against (though it's possible to override it & specify the
+ field).
+
+ Haystack's form differs because the same assumptions are difficult to make.
+ GeoDjango deals with a single model at a time, where Haystack deals with
+ a broad mix of models. Additionally, accessing ``Model`` information is a
+ couple hops away, so Haystack favors the explicit (if slightly more typing)
+ approach.
+
+
+Ordering
+========
+
+Because you're dealing with search, even with geospatial queries, results still
+come back in **RELEVANCE** order. If you want to offer the user ordering
+results by distance, there's a simple way to enable this ordering.
+
+Using the standard Haystack ``order_by`` method, if you specify ``distance`` or
+``-distance`` **ONLY**, you'll get geographic ordering. Additionally, you must
+have a call to ``.distance()`` somewhere in the chain, otherwise there is no
+distance information on the results & nothing to sort by.
+
+Examples::
+
+ from haystack.query import SearchQuerySet
+ from haystack.utils.geo import Point, D
+
+ ninth_and_mass = Point(-95.23592948913574, 38.96753407043678)
+ downtown_bottom_left = Point(-95.23947, 38.9637903)
+ downtown_top_right = Point(-95.23362278938293, 38.973081081164715)
+
+ # Non-geo ordering.
+ sqs = SearchQuerySet().within('location', downtown_bottom_left, downtown_top_right).order_by('title')
+ sqs = SearchQuerySet().within('location', downtown_bottom_left, downtown_top_right).distance('location', ninth_and_mass).order_by('-created')
+
+ # Geo ordering, closest to farthest.
+ sqs = SearchQuerySet().within('location', downtown_bottom_left, downtown_top_right).distance('location', ninth_and_mass).order_by('distance')
+ # Geo ordering, farthest to closest.
+ sqs = SearchQuerySet().dwithin('location', ninth_and_mass, D(mi=2)).distance('location', ninth_and_mass).order_by('-distance')
+
+.. note::
+
+ This call is identical to the GeoDjango usage.
+
+.. warning::
+
+ You can not specify both a distance & lexicographic ordering. If you specify
+ more than just ``distance`` or ``-distance``, Haystack assumes ``distance``
+ is a field in the index & tries to sort on it. Example::
+
+ # May blow up!
+ sqs = SearchQuerySet().dwithin('location', ninth_and_mass, D(mi=2)).distance('location', ninth_and_mass).order_by('distance', 'title')
+
+ This is a limitation in the engine's implementation.
+
+ If you actually **have** a field called ``distance`` (& aren't using
+ calculated distance information), Haystack will do the right thing in
+ these circumstances.
+
+
+Caveats
+=======
+
+In all cases, you may call the ``within/dwithin/distance`` methods as many times
+as you like. However, the **LAST** call is the information that will be used.
+No combination logic is available, as this is largely a backend limitation.
+
+Combining calls to both ``within`` & ``dwithin`` may yield unexpected or broken
+results. They don't overlap when performing queries, so it may be possible to
+construct queries that work. Your Mileage May Vary.
diff --git a/docs/templatetags.rst b/docs/templatetags.rst
new file mode 100644
index 0000000..71d6e08
--- /dev/null
+++ b/docs/templatetags.rst
@@ -0,0 +1,68 @@
+.. _ref-templatetags:
+
+=============
+Template Tags
+=============
+
+Haystack comes with a couple common template tags to make using some of its
+special features available to templates.
+
+
+``highlight``
+=============
+
+Takes a block of text and highlights words from a provided query within that
+block of text. Optionally accepts arguments to provide the HTML tag to wrap
+highlighted word in, a CSS class to use with the tag and a maximum length of
+the blurb in characters.
+
+The defaults are ``span`` for the HTML tag, ``highlighted`` for the CSS class
+and 200 characters for the excerpt.
+
+Syntax::
+
+ {% highlight <text_block> with <query> [css_class "class_name"] [html_tag "span"] [max_length 200] %}
+
+Example::
+
+ # Highlight summary with default behavior.
+ {% highlight result.summary with query %}
+
+ # Highlight summary but wrap highlighted words with a div and the
+ # following CSS class.
+ {% highlight result.summary with query html_tag "div" css_class "highlight_me_please" %}
+
+ # Highlight summary but only show 40 words.
+ {% highlight result.summary with query max_length 40 %}
+
+The highlighter used by this tag can be overridden as needed. See the
+:doc:`highlighting` documentation for more information.
+
+
+``more_like_this``
+==================
+
+Fetches similar items from the search index to find content that is similar
+to the provided model's content.
+
+.. note::
+
+ This requires a backend that has More Like This built-in.
+
+Syntax::
+
+ {% more_like_this model_instance as varname [for app_label.model_name,app_label.model_name,...] [limit n] %}
+
+Example::
+
+ # Pull a full SearchQuerySet (lazy loaded) of similar content.
+ {% more_like_this entry as related_content %}
+
+ # Pull just the top 5 similar pieces of content.
+ {% more_like_this entry as related_content limit 5 %}
+
+ # Pull just the top 5 similar entries or comments.
+ {% more_like_this entry as related_content for "blog.entry,comments.comment" limit 5 %}
+
+This tag behaves exactly like ``SearchQuerySet.more_like_this``, so all notes in
+that regard apply here as well.
diff --git a/docs/toc.rst b/docs/toc.rst
new file mode 100644
index 0000000..46ed9bb
--- /dev/null
+++ b/docs/toc.rst
@@ -0,0 +1,53 @@
+Table Of Contents
+=================
+
+.. toctree::
+ :maxdepth: 2
+
+ index
+ tutorial
+ glossary
+ views_and_forms
+ templatetags
+ management_commands
+ architecture_overview
+ backend_support
+ installing_search_engines
+ settings
+ faq
+ who_uses
+ other_apps
+ debugging
+
+ migration_from_1_to_2
+ python3
+ contributing
+
+ best_practices
+ highlighting
+ faceting
+ autocomplete
+ boost
+ signal_processors
+ multiple_index
+ rich_content_extraction
+ spatial
+
+ searchqueryset_api
+ searchindex_api
+ inputtypes
+ searchfield_api
+ searchresult_api
+ searchquery_api
+ searchbackend_api
+
+ running_tests
+ creating_new_backends
+ utils
+
+
+Indices and tables
+==================
+
+* :ref:`search`
+
diff --git a/docs/tutorial.rst b/docs/tutorial.rst
new file mode 100644
index 0000000..76b0388
--- /dev/null
+++ b/docs/tutorial.rst
@@ -0,0 +1,398 @@
+.. _ref-tutorial:
+
+=============================
+Getting Started with Haystack
+=============================
+
+Search is a topic of ever increasing importance. Users increasing rely on search
+to separate signal from noise and find what they're looking for quickly. In
+addition, search can provide insight into what things are popular (many
+searches), what things are difficult to find on the site and ways you can
+improve the site better.
+
+To this end, Haystack tries to make integrating custom search as easy as
+possible while being flexible/powerful enough to handle more advanced use cases.
+
+Haystack is a reusable app (that is, it relies only on its own code and focuses
+on providing just search) that plays nicely with both apps you control as well as
+third-party apps (such as ``django.contrib.*``) without having to modify the
+sources.
+
+Haystack also does pluggable backends (much like Django's database
+layer), so virtually all of the code you write ought to be portable between
+whichever search engine you choose.
+
+.. note::
+
+ If you hit a stumbling block, there is both a `mailing list`_ and
+ `#haystack on irc.freenode.net`_ to get help.
+
+.. note::
+
+ You can participate in and/or track the development of Haystack by
+ subscribing to the `development mailing list`_.
+
+.. _mailing list: http://groups.google.com/group/django-haystack
+.. _#haystack on irc.freenode.net: irc://irc.freenode.net/haystack
+.. _development mailing list: http://groups.google.com/group/django-haystack-dev
+
+This tutorial assumes that you have a basic familiarity with the various major
+parts of Django (models/forms/views/settings/URLconfs) and tailored to the
+typical use case. There are shortcuts available as well as hooks for much
+more advanced setups, but those will not be covered here.
+
+For example purposes, we'll be adding search functionality to a simple
+note-taking application. Here is ``myapp/models.py``::
+
+ from django.db import models
+ from django.contrib.auth.models import User
+
+
+ class Note(models.Model):
+ user = models.ForeignKey(User)
+ pub_date = models.DateTimeField()
+ title = models.CharField(max_length=200)
+ body = models.TextField()
+
+ def __unicode__(self):
+ return self.title
+
+Finally, before starting with Haystack, you will want to choose a search
+backend to get started. There is a quick-start guide to
+:doc:`installing_search_engines`, though you may want to defer to each engine's
+official instructions.
+
+
+Installation
+=============
+
+Use your favorite Python package manager to install the app from PyPI, e.g.
+
+Example::
+
+ pip install django-haystack
+
+
+Configuration
+=============
+
+Add Haystack To ``INSTALLED_APPS``
+----------------------------------
+
+As with most Django applications, you should add Haystack to the
+``INSTALLED_APPS`` within your settings file (usually ``settings.py``).
+
+Example::
+
+ INSTALLED_APPS = [
+ 'django.contrib.admin',
+ 'django.contrib.auth',
+ 'django.contrib.contenttypes',
+ 'django.contrib.sessions',
+ 'django.contrib.sites',
+
+ # Added.
+ 'haystack',
+
+ # Then your usual apps...
+ 'blog',
+ ]
+
+
+Modify Your ``settings.py``
+---------------------------
+
+Within your ``settings.py``, you'll need to add a setting to indicate where your
+site configuration file will live and which backend to use, as well as other
+settings for that backend.
+
+``HAYSTACK_CONNECTIONS`` is a required setting and should be at least one of
+the following:
+
+Solr
+~~~~
+
+Example::
+
+ HAYSTACK_CONNECTIONS = {
+ 'default': {
+ 'ENGINE': 'haystack.backends.solr_backend.SolrEngine',
+ 'URL': 'http://127.0.0.1:8983/solr'
+ # ...or for multicore...
+ # 'URL': 'http://127.0.0.1:8983/solr/mysite',
+ },
+ }
+
+
+Elasticsearch
+~~~~~~~~~~~~~
+
+Example::
+
+ HAYSTACK_CONNECTIONS = {
+ 'default': {
+ 'ENGINE': 'haystack.backends.elasticsearch_backend.ElasticsearchSearchEngine',
+ 'URL': 'http://127.0.0.1:9200/',
+ 'INDEX_NAME': 'haystack',
+ },
+ }
+
+
+Whoosh
+~~~~~~
+
+Requires setting ``PATH`` to the place on your filesystem where the
+Whoosh index should be located. Standard warnings about permissions and keeping
+it out of a place your webserver may serve documents out of apply.
+
+Example::
+
+ import os
+ HAYSTACK_CONNECTIONS = {
+ 'default': {
+ 'ENGINE': 'haystack.backends.whoosh_backend.WhooshEngine',
+ 'PATH': os.path.join(os.path.dirname(__file__), 'whoosh_index'),
+ },
+ }
+
+
+Xapian
+~~~~~~
+
+First, install the Xapian backend (via
+http://github.com/notanumber/xapian-haystack/tree/master) per the instructions
+included with the backend.
+
+Requires setting ``PATH`` to the place on your filesystem where the
+Xapian index should be located. Standard warnings about permissions and keeping
+it out of a place your webserver may serve documents out of apply.
+
+Example::
+
+ import os
+ HAYSTACK_CONNECTIONS = {
+ 'default': {
+ 'ENGINE': 'xapian_backend.XapianEngine',
+ 'PATH': os.path.join(os.path.dirname(__file__), 'xapian_index'),
+ },
+ }
+
+
+Simple
+~~~~~~
+
+The ``simple`` backend using very basic matching via the database itself. It's
+not recommended for production use but it will return results.
+
+.. warning::
+
+ This backend does *NOT* work like the other backends do. Data preparation
+ does nothing & advanced filtering calls do not work. You really probably
+ don't want this unless you're in an environment where you just want to
+ silence Haystack.
+
+Example::
+
+ HAYSTACK_CONNECTIONS = {
+ 'default': {
+ 'ENGINE': 'haystack.backends.simple_backend.SimpleEngine',
+ },
+ }
+
+
+Handling Data
+=============
+
+Creating ``SearchIndexes``
+--------------------------
+
+``SearchIndex`` objects are the way Haystack determines what data should be
+placed in the search index and handles the flow of data in. You can think of
+them as being similar to Django ``Models`` or ``Forms`` in that they are
+field-based and manipulate/store data.
+
+You generally create a unique ``SearchIndex`` for each type of ``Model`` you
+wish to index, though you can reuse the same ``SearchIndex`` between different
+models if you take care in doing so and your field names are very standardized.
+
+To build a ``SearchIndex``, all that's necessary is to subclass both
+``indexes.SearchIndex`` & ``indexes.Indexable``,
+define the fields you want to store data with and define a ``get_model`` method.
+
+We'll create the following ``NoteIndex`` to correspond to our ``Note``
+model. This code generally goes in a ``search_indexes.py`` file within the app
+it applies to, though that is not required. This allows
+Haystack to automatically pick it up. The ``NoteIndex`` should look like::
+
+ import datetime
+ from haystack import indexes
+ from myapp.models import Note
+
+
+ class NoteIndex(indexes.SearchIndex, indexes.Indexable):
+ text = indexes.CharField(document=True, use_template=True)
+ author = indexes.CharField(model_attr='user')
+ pub_date = indexes.DateTimeField(model_attr='pub_date')
+
+ def get_model(self):
+ return Note
+
+ def index_queryset(self, using=None):
+ """Used when the entire index for model is updated."""
+ return self.get_model().objects.filter(pub_date__lte=datetime.datetime.now())
+
+Every ``SearchIndex`` requires there be one (and only one) field with
+``document=True``. This indicates to both Haystack and the search engine about
+which field is the primary field for searching within.
+
+.. warning::
+
+ When you choose a ``document=True`` field, it should be consistently named
+ across all of your ``SearchIndex`` classes to avoid confusing the backend.
+ The convention is to name this field ``text``.
+
+ There is nothing special about the ``text`` field name used in all of the
+ examples. It could be anything; you could call it ``pink_polka_dot`` and
+ it won't matter. It's simply a convention to call it ``text``.
+
+Additionally, we're providing ``use_template=True`` on the ``text`` field. This
+allows us to use a data template (rather than error-prone concatenation) to
+build the document the search engine will index. You’ll need to
+create a new template inside your template directory called
+``search/indexes/myapp/note_text.txt`` and place the following inside::
+
+ {{ object.title }}
+ {{ object.user.get_full_name }}
+ {{ object.body }}
+
+In addition, we added several other fields (``author`` and ``pub_date``). These
+are useful when you want to provide additional filtering options. Haystack comes
+with a variety of ``SearchField`` classes to handle most types of data.
+
+A common theme is to allow admin users to add future content but have it not
+display on the site until that future date is reached. We specify a custom
+``index_queryset`` method to prevent those future items from being indexed.
+
+.. _Django admin site: http://docs.djangoproject.com/en/dev/ref/contrib/admin/
+
+
+Setting Up The Views
+====================
+
+Add The ``SearchView`` To Your URLconf
+--------------------------------------
+
+Within your URLconf, add the following line::
+
+ (r'^search/', include('haystack.urls')),
+
+This will pull in the default URLconf for Haystack. It consists of a single
+URLconf that points to a ``SearchView`` instance. You can change this class's
+behavior by passing it any of several keyword arguments or override it entirely
+with your own view.
+
+
+Search Template
+---------------
+
+Your search template (``search/search.html`` for the default case) will likely
+be very simple. The following is enough to get going (your template/block names
+will likely differ)::
+
+ {% extends 'base.html' %}
+
+ {% block content %}
+ <h2>Search</h2>
+
+ <form method="get" action=".">
+ <table>
+ {{ form.as_table }}
+ <tr>
+ <td>&nbsp;</td>
+ <td>
+ <input type="submit" value="Search">
+ </td>
+ </tr>
+ </table>
+
+ {% if query %}
+ <h3>Results</h3>
+
+ {% for result in page.object_list %}
+ <p>
+ <a href="{{ result.object.get_absolute_url }}">{{ result.object.title }}</a>
+ </p>
+ {% empty %}
+ <p>No results found.</p>
+ {% endfor %}
+
+ {% if page.has_previous or page.has_next %}
+ <div>
+ {% if page.has_previous %}<a href="?q={{ query }}&amp;page={{ page.previous_page_number }}">{% endif %}&laquo; Previous{% if page.has_previous %}</a>{% endif %}
+ |
+ {% if page.has_next %}<a href="?q={{ query }}&amp;page={{ page.next_page_number }}">{% endif %}Next &raquo;{% if page.has_next %}</a>{% endif %}
+ </div>
+ {% endif %}
+ {% else %}
+ {# Show some example queries to run, maybe query syntax, something else? #}
+ {% endif %}
+ </form>
+ {% endblock %}
+
+Note that the ``page.object_list`` is actually a list of ``SearchResult``
+objects instead of individual models. These objects have all the data returned
+from that record within the search index as well as score. They can also
+directly access the model for the result via ``{{ result.object }}``. So the
+``{{ result.object.title }}`` uses the actual ``Note`` object in the database
+and accesses its ``title`` field.
+
+
+Reindex
+-------
+
+The final step, now that you have everything setup, is to put your data in
+from your database into the search index. Haystack ships with a management
+command to make this process easy.
+
+.. note::
+
+ If you're using the Solr backend, you have an extra step. Solr's
+ configuration is XML-based, so you'll need to manually regenerate the
+ schema. You should run
+ ``./manage.py build_solr_schema`` first, drop the XML output in your
+ Solr's ``schema.xml`` file and restart your Solr server.
+
+Simply run ``./manage.py rebuild_index``. You'll get some totals of how many
+models were processed and placed in the index.
+
+.. note::
+
+ Using the standard ``SearchIndex``, your search index content is only
+ updated whenever you run either ``./manage.py update_index`` or start
+ afresh with ``./manage.py rebuild_index``.
+
+ You should cron up a ``./manage.py update_index`` job at whatever interval
+ works best for your site (using ``--age=<num_hours>`` reduces the number of
+ things to update).
+
+ Alternatively, if you have low traffic and/or your search engine can handle
+ it, the ``RealtimeSignalProcessor`` automatically handles updates/deletes
+ for you.
+
+
+Complete!
+=========
+
+You can now visit the search section of your site, enter a search query and
+receive search results back for the query! Congratulations!
+
+
+What's Next?
+============
+
+This tutorial just scratches the surface of what Haystack provides. The
+``SearchQuerySet`` is the underpinning of all search in Haystack and provides
+a powerful, ``QuerySet``-like API (see :ref:`ref-searchqueryset-api`). You can
+use much more complicated ``SearchForms``/``SearchViews`` to give users a better
+UI (see :ref:`ref-views-and_forms`). And the :ref:`ref-best-practices` provides
+insight into non-obvious or advanced usages of Haystack.
diff --git a/docs/utils.rst b/docs/utils.rst
new file mode 100644
index 0000000..7d42fc5
--- /dev/null
+++ b/docs/utils.rst
@@ -0,0 +1,18 @@
+.. _ref-utils:
+
+=========
+Utilities
+=========
+
+Included here are some of the general use bits included with Haystack.
+
+
+``get_identifier``
+------------------
+
+.. function:: get_identifier(obj_or_string)
+
+Gets an unique identifier for the object or a string representing the
+object.
+
+If not overridden, uses ``<app_label>.<object_name>.<pk>``.
diff --git a/docs/views_and_forms.rst b/docs/views_and_forms.rst
new file mode 100644
index 0000000..b4c7697
--- /dev/null
+++ b/docs/views_and_forms.rst
@@ -0,0 +1,408 @@
+.. _ref-views-and_forms:
+
+=============
+Views & Forms
+=============
+
+.. note::
+
+ As of version 2.4 the views in ``haystack.views.SearchView`` are deprecated in
+ favor of the new generic views in ``haystack.generic_views.SearchView``
+ which use the standard Django `class-based views`_ which are available in
+ every version of Django which is supported by Haystack.
+
+.. _class-based views: https://docs.djangoproject.com/en/1.7/topics/class-based-views/
+
+Haystack comes with some default, simple views & forms as well as some
+django-style views to help you get started and to cover the common cases.
+Included is a way to provide:
+
+ * Basic, query-only search.
+ * Search by models.
+ * Search with basic highlighted results.
+ * Faceted search.
+ * Search by models with basic highlighted results.
+
+Most processing is done by the forms provided by Haystack via the ``search``
+method. As a result, all but the faceted types (see :doc:`faceting`) use the
+standard ``SearchView``.
+
+There is very little coupling between the forms & the views (other than relying
+on the existence of a ``search`` method on the form), so you may interchangeably
+use forms and/or views anywhere within your own code.
+
+Forms
+=====
+
+.. currentmodule:: haystack.forms
+
+``SearchForm``
+--------------
+
+The most basic of the form types, this form consists of a single field, the
+``q`` field (for query). Upon searching, the form will take the cleaned contents
+of the ``q`` field and perform an ``auto_query`` on either the custom
+``SearchQuerySet`` you provide or off a default ``SearchQuerySet``.
+
+To customize the ``SearchQuerySet`` the form will use, pass it a
+``searchqueryset`` parameter to the constructor with the ``SearchQuerySet``
+you'd like to use. If using this form in conjunction with a ``SearchView``,
+the form will receive whatever ``SearchQuerySet`` you provide to the view with
+no additional work needed.
+
+The ``SearchForm`` also accepts a ``load_all`` parameter (``True`` or
+``False``), which determines how the database is queried when iterating through
+the results. This also is received automatically from the ``SearchView``.
+
+All other forms in Haystack inherit (either directly or indirectly) from this
+form.
+
+``HighlightedSearchForm``
+-------------------------
+
+Identical to the ``SearchForm`` except that it tags the ``highlight`` method on
+to the end of the ``SearchQuerySet`` to enable highlighted results.
+
+``ModelSearchForm``
+-------------------
+
+This form adds new fields to form. It iterates through all registered models for
+the current ``SearchSite`` and provides a checkbox for each one. If no models
+are selected, all types will show up in the results.
+
+``HighlightedModelSearchForm``
+------------------------------
+
+Identical to the ``ModelSearchForm`` except that it tags the ``highlight``
+method on to the end of the ``SearchQuerySet`` to enable highlighted results on
+the selected models.
+
+``FacetedSearchForm``
+---------------------
+
+Identical to the ``SearchForm`` except that it adds a hidden ``selected_facets``
+field onto the form, allowing the form to narrow the results based on the facets
+chosen by the user.
+
+Creating Your Own Form
+----------------------
+
+The simplest way to go about creating your own form is to inherit from
+``SearchForm`` (or the desired parent) and extend the ``search`` method. By
+doing this, you save yourself most of the work of handling data correctly and
+stay API compatible with the ``SearchView``.
+
+For example, let's say you're providing search with a user-selectable date range
+associated with it. You might create a form that looked as follows::
+
+ from django import forms
+ from haystack.forms import SearchForm
+
+
+ class DateRangeSearchForm(SearchForm):
+ start_date = forms.DateField(required=False)
+ end_date = forms.DateField(required=False)
+
+ def search(self):
+ # First, store the SearchQuerySet received from other processing.
+ sqs = super(DateRangeSearchForm, self).search()
+
+ if not self.is_valid():
+ return self.no_query_found()
+
+ # Check to see if a start_date was chosen.
+ if self.cleaned_data['start_date']:
+ sqs = sqs.filter(pub_date__gte=self.cleaned_data['start_date'])
+
+ # Check to see if an end_date was chosen.
+ if self.cleaned_data['end_date']:
+ sqs = sqs.filter(pub_date__lte=self.cleaned_data['end_date'])
+
+ return sqs
+
+This form adds two new fields for (optionally) choosing the start and end dates.
+Within the ``search`` method, we grab the results from the parent form's
+processing. Then, if a user has selected a start and/or end date, we apply that
+filtering. Finally, we simply return the ``SearchQuerySet``.
+
+Views
+=====
+
+.. currentmodule:: haystack.views
+
+.. note::
+
+ As of version 2.4 the views in ``haystack.views.SearchView`` are deprecated in
+ favor of the new generic views in ``haystack.generic_views.SearchView``
+ which use the standard Django `class-based views`_ which are available in
+ every version of Django which is supported by Haystack.
+
+.. _class-based views: https://docs.djangoproject.com/en/1.7/topics/class-based-views/
+
+New Django Class Based Views
+----------------------------
+
+ .. versionadded:: 2.4.0
+
+The views in ``haystack.generic_views.SearchView`` inherit from Django’s standard
+`FormView <https://docs.djangoproject.com/en/1.7/ref/class-based-views/generic-editing/#formview>`_.
+The example views can be customized like any other Django class-based view as
+demonstrated in this example which filters the search results in ``get_queryset``::
+
+ # views.py
+ from datetime import date
+
+ from haystack.generic_views import SearchView
+
+ class MySearchView(SearchView):
+ """My custom search view."""
+
+ def get_queryset(self):
+ queryset = super(MySearchView, self).get_queryset()
+ # further filter queryset based on some set of criteria
+ return queryset.filter(pub_date__gte=date(2015, 1, 1))
+
+ def get_context_data(self, *args, **kwargs):
+ context = super(MySearchView, self).get_context_data(*args, **kwargs)
+ # do something
+ return context
+
+ # urls.py
+
+ urlpatterns = patterns('',
+ url(r'^/search/?$', MySearchView.as_view(), name='search_view'),
+ )
+
+
+Upgrading
+~~~~~~~~~
+
+Upgrading from basic usage of the old-style views to new-style views is usually as simple as:
+
+#. Create new views under ``views.py`` subclassing ``haystack.generic_views.SearchView``
+ or ``haystack.generic_views.FacetedSearchView``
+#. Move all parameters of your old-style views from your ``urls.py`` to attributes on
+ your new views. This will require renaming ``searchqueryset`` to ``queryset`` and
+ ``template`` to ``template_name``
+#. Review your templates and replace the ``page`` variable with ``page_object``
+
+Here's an example::
+
+ ### old-style views...
+ # urls.py
+
+ sqs = SearchQuerySet().filter(author='john')
+
+ urlpatterns = patterns('haystack.views',
+ url(r'^$', SearchView(
+ template='my/special/path/john_search.html',
+ searchqueryset=sqs,
+ form_class=SearchForm
+ ), name='haystack_search'),
+ )
+
+ ### new-style views...
+ # views.py
+
+ class JohnSearchView(SearchView):
+ template_name = 'my/special/path/john_search.html'
+ queryset = SearchQuerySet().filter(author='john')
+ form_class = SearchForm
+
+ # urls.py
+ from myapp.views import JohnSearchView
+
+ urlpatterns = patterns('',
+ url(r'^$', JohnSearchView.as_view(), name='haystack_search'),
+ )
+
+
+If your views overrode methods on the old-style SearchView, you will need to
+refactor those methods to the equivalents on Django's generic views. For example,
+if you previously used ``extra_context()`` to add additional template variables or
+preprocess the values returned by Haystack, that code would move to ``get_context_data``
+
++-----------------------+-------------------------------------------+
+| Old Method | New Method |
++=======================+===========================================+
+| ``extra_context()`` | `get_context_data()`_ |
++-----------------------+-------------------------------------------+
+| ``create_response()`` | `dispatch()`_ or ``get()`` and ``post()`` |
++-----------------------+-------------------------------------------+
+| ``get_query()`` | `get_queryset()`_ |
++-----------------------+-------------------------------------------+
+
+.. _get_context_data(): https://docs.djangoproject.com/en/1.7/ref/class-based-views/mixins-simple/#django.views.generic.base.ContextMixin.get_context_data
+.. _dispatch(): https://docs.djangoproject.com/en/1.7/ref/class-based-views/base/#django.views.generic.base.View.dispatch
+.. _get_queryset(): https://docs.djangoproject.com/en/1.7/ref/class-based-views/mixins-multiple-object/#django.views.generic.list.MultipleObjectMixin.get_queryset
+
+
+Old-Style Views
+---------------
+
+ .. deprecated:: 2.4.0
+
+Haystack comes bundled with three views, the class-based views (``SearchView`` &
+``FacetedSearchView``) and a traditional functional view (``basic_search``).
+
+The class-based views provide for easy extension should you need to alter the
+way a view works. Except in the case of faceting (again, see :doc:`faceting`),
+the ``SearchView`` works interchangeably with all other forms provided by
+Haystack.
+
+The functional view provides an example of how Haystack can be used in more
+traditional settings or as an example of how to write a more complex custom
+view. It is also thread-safe.
+
+``SearchView(template=None, load_all=True, form_class=None, searchqueryset=None, context_class=RequestContext, results_per_page=None)``
+---------------------------------------------------------------------------------------------------------------------------------------
+
+The ``SearchView`` is designed to be easy/flexible enough to override common
+changes as well as being internally abstracted so that only altering a specific
+portion of the code should be easy to do.
+
+Without touching any of the internals of the ``SearchView``, you can modify
+which template is used, which form class should be instantiated to search with,
+what ``SearchQuerySet`` to use in the event you wish to pre-filter the results.
+what ``Context``-style object to use in the response and the ``load_all``
+performance optimization to reduce hits on the database. These options can (and
+generally should) be overridden at the URLconf level. For example, to have a
+custom search limited to the 'John' author, displaying all models to search by
+and specifying a custom template (``my/special/path/john_search.html``), your
+URLconf should look something like::
+
+ from django.conf.urls.defaults import *
+ from haystack.forms import ModelSearchForm
+ from haystack.query import SearchQuerySet
+ from haystack.views import SearchView
+
+ sqs = SearchQuerySet().filter(author='john')
+
+ # Without threading...
+ urlpatterns = patterns('haystack.views',
+ url(r'^$', SearchView(
+ template='my/special/path/john_search.html',
+ searchqueryset=sqs,
+ form_class=SearchForm
+ ), name='haystack_search'),
+ )
+
+ # With threading...
+ from haystack.views import SearchView, search_view_factory
+
+ urlpatterns = patterns('haystack.views',
+ url(r'^$', search_view_factory(
+ view_class=SearchView,
+ template='my/special/path/john_search.html',
+ searchqueryset=sqs,
+ form_class=ModelSearchForm
+ ), name='haystack_search'),
+ )
+
+.. warning::
+
+ The standard ``SearchView`` is not thread-safe. Use the
+ ``search_view_factory`` function, which returns thread-safe instances of
+ ``SearchView``.
+
+By default, if you don't specify a ``form_class``, the view will use the
+``haystack.forms.ModelSearchForm`` form.
+
+Beyond this customizations, you can create your own ``SearchView`` and
+extend/override the following methods to change the functionality.
+
+``__call__(self, request)``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Generates the actual response to the search.
+
+Relies on internal, overridable methods to construct the response. You generally
+should avoid altering this method unless you need to change the flow of the
+methods or to add a new method into the processing.
+
+``build_form(self, form_kwargs=None)``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Instantiates the form the class should use to process the search query.
+
+Optionally accepts a dictionary of parameters that are passed on to the
+form's ``__init__``. You can use this to lightly customize the form.
+
+You should override this if you write a custom form that needs special
+parameters for instantiation.
+
+``get_query(self)``
+~~~~~~~~~~~~~~~~~~~
+
+Returns the query provided by the user.
+
+Returns an empty string if the query is invalid. This pulls the cleaned query
+from the form, via the ``q`` field, for use elsewhere within the ``SearchView``.
+This is used to populate the ``query`` context variable.
+
+``get_results(self)``
+~~~~~~~~~~~~~~~~~~~~~
+
+Fetches the results via the form.
+
+Returns an empty list if there's no query to search with. This method relies on
+the form to do the heavy lifting as much as possible.
+
+``build_page(self)``
+~~~~~~~~~~~~~~~~~~~~
+
+Paginates the results appropriately.
+
+In case someone does not want to use Django's built-in pagination, it
+should be a simple matter to override this method to do what they would
+like.
+
+``extra_context(self)``
+~~~~~~~~~~~~~~~~~~~~~~~
+
+Allows the addition of more context variables as needed. Must return a
+dictionary whose contents will add to or overwrite the other variables in the
+context.
+
+``create_response(self)``
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Generates the actual HttpResponse to send back to the user. It builds the page,
+creates the context and renders the response for all the aforementioned
+processing.
+
+
+``basic_search(request, template='search/search.html', load_all=True, form_class=ModelSearchForm, searchqueryset=None, context_class=RequestContext, extra_context=None, results_per_page=None)``
+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+
+The ``basic_search`` tries to provide most of the same functionality as the
+class-based views but resembles a more traditional generic view. It's both a
+working view if you prefer not to use the class-based views as well as a good
+starting point for writing highly custom views.
+
+Since it is all one function, the only means of extension are passing in
+kwargs, similar to the way generic views work.
+
+
+Creating Your Own View
+----------------------
+
+As with the forms, inheritance is likely your best bet. In this case, the
+``FacetedSearchView`` is a perfect example of how to extend the existing
+``SearchView``. The complete code for the ``FacetedSearchView`` looks like::
+
+ class FacetedSearchView(SearchView):
+ def extra_context(self):
+ extra = super(FacetedSearchView, self).extra_context()
+
+ if self.results == []:
+ extra['facets'] = self.form.search().facet_counts()
+ else:
+ extra['facets'] = self.results.facet_counts()
+
+ return extra
+
+It updates the name of the class (generally for documentation purposes) and
+adds the facets from the ``SearchQuerySet`` to the context as the ``facets``
+variable. As with the custom form example above, it relies on the parent class
+to handle most of the processing and extends that only where needed.
diff --git a/docs/who_uses.rst b/docs/who_uses.rst
new file mode 100644
index 0000000..9419213
--- /dev/null
+++ b/docs/who_uses.rst
@@ -0,0 +1,357 @@
+.. _ref-who-uses:
+
+Sites Using Haystack
+====================
+
+The following sites are a partial list of people using Haystack. I'm always
+interested in adding more sites, so please find me (``daniellindsley``) via
+IRC or the mailing list thread.
+
+
+LJWorld/Lawrence.com/KUSports
+-----------------------------
+
+For all things search-related.
+
+Using: Solr
+
+* http://www2.ljworld.com/search/
+* http://www2.ljworld.com/search/vertical/news.story/
+* http://www2.ljworld.com/marketplace/
+* http://www.lawrence.com/search/
+* http://www.kusports.com/search/
+
+
+AltWeeklies
+-----------
+
+Providing an API to story aggregation.
+
+Using: Whoosh
+
+* http://www.northcoastjournal.com/altweeklies/documentation/
+
+
+Trapeze
+-------
+
+Various projects.
+
+Using: Xapian
+
+* http://www.trapeze.com/
+* http://www.windmobile.ca/
+* http://www.bonefishgrill.com/
+* http://www.canadiantire.ca/ (Portions of)
+
+
+Vickerey.com
+------------
+
+For (really well done) search & faceting.
+
+Using: Solr
+
+* http://store.vickerey.com/products/search/
+
+
+Eldarion
+--------
+
+Various projects.
+
+Using: Solr
+
+* http://eldarion.com/
+
+
+Sunlight Labs
+-------------
+
+For general search.
+
+Using: Whoosh & Solr
+
+* http://sunlightlabs.com/
+* http://subsidyscope.com/
+
+
+NASA
+----
+
+For general search.
+
+Using: Solr
+
+* An internal site called SMD Spacebook 1.1.
+* http://science.nasa.gov/
+
+
+AllForLocal
+-----------
+
+For general search.
+
+* http://www.allforlocal.com/
+
+
+HUGE
+----
+
+Various projects.
+
+Using: Solr
+
+* http://hugeinc.com/
+* http://houselogic.com/
+
+
+Brick Design
+------------
+
+For search on Explore.
+
+Using: Solr
+
+* http://bricksf.com/
+* http://explore.org/
+
+
+Winding Road
+------------
+
+For general search.
+
+Using: Solr
+
+* http://www.windingroad.com/
+
+
+Reddit
+------
+
+For Reddit Gifts.
+
+Using: Whoosh
+
+* http://redditgifts.com/
+
+
+Pegasus News
+------------
+
+For general search.
+
+Using: Xapian
+
+* http://www.pegasusnews.com/
+
+
+Rampframe
+---------
+
+For general search.
+
+Using: Xapian
+
+* http://www.rampframe.com/
+
+
+Forkinit
+--------
+
+For general search, model-specific search and suggestions via MLT.
+
+Using: Solr
+
+* http://forkinit.com/
+
+
+Structured Abstraction
+----------------------
+
+For general search.
+
+Using: Xapian
+
+* http://www.structuredabstraction.com/
+* http://www.delivergood.org/
+
+
+CustomMade
+----------
+
+For general search.
+
+Using: Solr
+
+* http://www.custommade.com/
+
+
+University of the Andes, Dept. of Political Science
+---------------------------------------------------
+
+For general search & section-specific search. Developed by Monoku.
+
+Using: Solr
+
+* http://www.congresovisible.org/
+* http://www.monoku.com/
+
+
+Christchurch Art Gallery
+------------------------
+
+For general search & section-specific search.
+
+Using: Solr
+
+* http://christchurchartgallery.org.nz/search/
+* http://christchurchartgallery.org.nz/collection/browse/
+
+
+DevCheatSheet.com
+-----------------
+
+For general search.
+
+Using: Xapian
+
+* http://devcheatsheet.com/
+
+
+TodasLasRecetas
+---------------
+
+For search, faceting & More Like This.
+
+Using: Solr
+
+* http://www.todaslasrecetas.es/receta/s/?q=langostinos
+* http://www.todaslasrecetas.es/receta/9526/brochetas-de-langostinos
+
+
+AstroBin
+--------
+
+For general search.
+
+Using: Solr
+
+* http://www.astrobin.com/
+
+
+European Paper Company
+----------------------
+
+For general search.
+
+Using: ???
+
+* http://europeanpaper.com/
+
+
+mtn-op
+------
+
+For general search.
+
+Using: ???
+
+* http://mountain-op.com/
+
+
+Crate
+-----
+
+Crate is a PyPI mirror/replacement. It's using Haystack to power all search &
+faceted navigation on the site.
+
+Using: Elasticsearch
+
+* https://crate.io/
+
+
+Pix Populi
+----------
+
+Pix Populi is a popular French photo sharing site.
+
+Using: Solr
+
+* http://www.pix-populi.fr/
+
+
+LocalWiki
+----------
+
+LocalWiki is a tool for collaborating in local, geographic communities.
+It's using Haystack to power search on every LocalWiki instance.
+
+Using: Solr
+
+* http://localwiki.org/
+
+
+Pitchup
+-------
+
+For faceting, geo and autocomplete.
+
+Using: ???
+
+* http://www.pitchup.com/search/
+
+
+Gidsy
+-----
+
+Gidsy makes it easy for anyone to organize and find exciting things
+to do everywhere in the world.
+
+For activity search, area pages, forums and private messages.
+
+Using: Elasticsearch
+
+* https://gidsy.com/
+* https://gidsy.com/search/
+* https://gidsy.com/forum/
+
+
+GroundCity
+----------
+
+Groundcity is a Romanian dynamic real estate site.
+
+For real estate, forums and comments.
+
+Using: Whoosh
+
+* http://groundcity.ro/cautare/
+
+
+Docket Alarm
+------------
+
+Docket Alarm allows people to search court dockets across
+the country. With it, you can search court dockets in the International Trade
+Commission (ITC), the Patent Trial and Appeal Board (PTAB) and All Federal
+Courts.
+
+Using: Elasticsearch
+
+* https://www.docketalarm.com/search/ITC
+* https://www.docketalarm.com/search/PTAB
+* https://www.docketalarm.com/search/dockets
+
+
+Educreations
+-------------
+
+Educreations makes it easy for anyone to teach what they know and learn
+what they don't with a recordable whiteboard. Haystack is used to
+provide search across users and lessons.
+
+Using: Solr
+
+* http://www.educreations.com/browse/
diff --git a/haystack/__init__.py b/haystack/__init__.py
new file mode 100644
index 0000000..a02c845
--- /dev/null
+++ b/haystack/__init__.py
@@ -0,0 +1,71 @@
+# encoding: utf-8
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import logging
+
+from django.conf import settings
+from django.core.exceptions import ImproperlyConfigured
+
+from haystack.constants import DEFAULT_ALIAS
+from haystack import signals
+from haystack.utils import loading
+
+
+__author__ = 'Daniel Lindsley'
+__version__ = (2, 4, 0)
+
+
+# Setup default logging.
+log = logging.getLogger('haystack')
+stream = logging.StreamHandler()
+stream.setLevel(logging.INFO)
+log.addHandler(stream)
+
+
+# Help people clean up from 1.X.
+if hasattr(settings, 'HAYSTACK_SITECONF'):
+ raise ImproperlyConfigured('The HAYSTACK_SITECONF setting is no longer used & can be removed.')
+if hasattr(settings, 'HAYSTACK_SEARCH_ENGINE'):
+ raise ImproperlyConfigured('The HAYSTACK_SEARCH_ENGINE setting has been replaced with HAYSTACK_CONNECTIONS.')
+if hasattr(settings, 'HAYSTACK_ENABLE_REGISTRATIONS'):
+ raise ImproperlyConfigured('The HAYSTACK_ENABLE_REGISTRATIONS setting is no longer used & can be removed.')
+if hasattr(settings, 'HAYSTACK_INCLUDE_SPELLING'):
+ raise ImproperlyConfigured('The HAYSTACK_INCLUDE_SPELLING setting is now a per-backend setting & belongs in HAYSTACK_CONNECTIONS.')
+
+
+# Check the 2.X+ bits.
+if not hasattr(settings, 'HAYSTACK_CONNECTIONS'):
+ raise ImproperlyConfigured('The HAYSTACK_CONNECTIONS setting is required.')
+if DEFAULT_ALIAS not in settings.HAYSTACK_CONNECTIONS:
+ raise ImproperlyConfigured("The default alias '%s' must be included in the HAYSTACK_CONNECTIONS setting." % DEFAULT_ALIAS)
+
+# Load the connections.
+connections = loading.ConnectionHandler(settings.HAYSTACK_CONNECTIONS)
+
+# Load the router(s).
+connection_router = loading.ConnectionRouter()
+
+if hasattr(settings, 'HAYSTACK_ROUTERS'):
+ if not isinstance(settings.HAYSTACK_ROUTERS, (list, tuple)):
+ raise ImproperlyConfigured("The HAYSTACK_ROUTERS setting must be either a list or tuple.")
+
+ connection_router = loading.ConnectionRouter(settings.HAYSTACK_ROUTERS)
+
+# Setup the signal processor.
+signal_processor_path = getattr(settings, 'HAYSTACK_SIGNAL_PROCESSOR', 'haystack.signals.BaseSignalProcessor')
+signal_processor_class = loading.import_class(signal_processor_path)
+signal_processor = signal_processor_class(connections, connection_router)
+
+
+# Per-request, reset the ghetto query log.
+# Probably not extraordinarily thread-safe but should only matter when
+# DEBUG = True.
+def reset_search_queries(**kwargs):
+ for conn in connections.all():
+ conn.reset_queries()
+
+
+if settings.DEBUG:
+ from django.core import signals as django_signals
+ django_signals.request_started.connect(reset_search_queries)
diff --git a/haystack/admin.py b/haystack/admin.py
new file mode 100644
index 0000000..806991a
--- /dev/null
+++ b/haystack/admin.py
@@ -0,0 +1,163 @@
+# encoding: utf-8
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+from django import template
+from django.contrib.admin.options import csrf_protect_m, ModelAdmin
+from django.contrib.admin.views.main import ChangeList, SEARCH_VAR
+from django.core.exceptions import PermissionDenied
+from django.core.paginator import InvalidPage, Paginator
+from django.shortcuts import render_to_response
+from django.utils.translation import ungettext
+
+from haystack import connections
+from haystack.query import SearchQuerySet
+from haystack.utils import get_model_ct_tuple
+
+try:
+ from django.utils.encoding import force_text
+except ImportError:
+ from django.utils.encoding import force_unicode as force_text
+
+
+def list_max_show_all(changelist):
+ """
+ Returns the maximum amount of results a changelist can have for the
+ "Show all" link to be displayed in a manner compatible with both Django
+ 1.4 and 1.3. See Django ticket #15997 for details.
+ """
+ try:
+ # This import is available in Django 1.3 and below
+ from django.contrib.admin.views.main import MAX_SHOW_ALL_ALLOWED
+ return MAX_SHOW_ALL_ALLOWED
+ except ImportError:
+ return changelist.list_max_show_all
+
+
+class SearchChangeList(ChangeList):
+ def __init__(self, **kwargs):
+ self.haystack_connection = kwargs.pop('haystack_connection', 'default')
+ super(SearchChangeList, self).__init__(**kwargs)
+
+ def get_results(self, request):
+ if not SEARCH_VAR in request.GET:
+ return super(SearchChangeList, self).get_results(request)
+
+ # Note that pagination is 0-based, not 1-based.
+ sqs = SearchQuerySet(self.haystack_connection).models(self.model).auto_query(request.GET[SEARCH_VAR]).load_all()
+
+ paginator = Paginator(sqs, self.list_per_page)
+ # Get the number of objects, with admin filters applied.
+ result_count = paginator.count
+ full_result_count = SearchQuerySet(self.haystack_connection).models(self.model).all().count()
+
+ can_show_all = result_count <= list_max_show_all(self)
+ multi_page = result_count > self.list_per_page
+
+ # Get the list of objects to display on this page.
+ try:
+ result_list = paginator.page(self.page_num + 1).object_list
+ # Grab just the Django models, since that's what everything else is
+ # expecting.
+ result_list = [result.object for result in result_list]
+ except InvalidPage:
+ result_list = ()
+
+ self.result_count = result_count
+ self.full_result_count = full_result_count
+ self.result_list = result_list
+ self.can_show_all = can_show_all
+ self.multi_page = multi_page
+ self.paginator = paginator
+
+
+class SearchModelAdminMixin(object):
+ # haystack connection to use for searching
+ haystack_connection = 'default'
+
+ @csrf_protect_m
+ def changelist_view(self, request, extra_context=None):
+ if not self.has_change_permission(request, None):
+ raise PermissionDenied
+
+ if not SEARCH_VAR in request.GET:
+ # Do the usual song and dance.
+ return super(SearchModelAdminMixin, self).changelist_view(request, extra_context)
+
+ # Do a search of just this model and populate a Changelist with the
+ # returned bits.
+ if not self.model in connections[self.haystack_connection].get_unified_index().get_indexed_models():
+ # Oops. That model isn't being indexed. Return the usual
+ # behavior instead.
+ return super(SearchModelAdminMixin, self).changelist_view(request, extra_context)
+
+ # So. Much. Boilerplate.
+ # Why copy-paste a few lines when you can copy-paste TONS of lines?
+ list_display = list(self.list_display)
+
+ kwargs = {
+ 'haystack_connection': self.haystack_connection,
+ 'request': request,
+ 'model': self.model,
+ 'list_display': list_display,
+ 'list_display_links': self.list_display_links,
+ 'list_filter': self.list_filter,
+ 'date_hierarchy': self.date_hierarchy,
+ 'search_fields': self.search_fields,
+ 'list_select_related': self.list_select_related,
+ 'list_per_page': self.list_per_page,
+ 'list_editable': self.list_editable,
+ 'model_admin': self
+ }
+
+ # Django 1.4 compatibility.
+ if hasattr(self, 'list_max_show_all'):
+ kwargs['list_max_show_all'] = self.list_max_show_all
+
+ changelist = SearchChangeList(**kwargs)
+ formset = changelist.formset = None
+ media = self.media
+
+ # Build the action form and populate it with available actions.
+ # Check actions to see if any are available on this changelist
+ actions = self.get_actions(request)
+ if actions:
+ action_form = self.action_form(auto_id=None)
+ action_form.fields['action'].choices = self.get_action_choices(request)
+ else:
+ action_form = None
+
+ selection_note = ungettext('0 of %(count)d selected',
+ 'of %(count)d selected', len(changelist.result_list))
+ selection_note_all = ungettext('%(total_count)s selected',
+ 'All %(total_count)s selected', changelist.result_count)
+
+ context = {
+ 'module_name': force_text(self.model._meta.verbose_name_plural),
+ 'selection_note': selection_note % {'count': len(changelist.result_list)},
+ 'selection_note_all': selection_note_all % {'total_count': changelist.result_count},
+ 'title': changelist.title,
+ 'is_popup': changelist.is_popup,
+ 'cl': changelist,
+ 'media': media,
+ 'has_add_permission': self.has_add_permission(request),
+ # More Django 1.4 compatibility
+ 'root_path': getattr(self.admin_site, 'root_path', None),
+ 'app_label': self.model._meta.app_label,
+ 'action_form': action_form,
+ 'actions_on_top': self.actions_on_top,
+ 'actions_on_bottom': self.actions_on_bottom,
+ 'actions_selection_counter': getattr(self, 'actions_selection_counter', 0),
+ }
+ context.update(extra_context or {})
+ context_instance = template.RequestContext(request, current_app=self.admin_site.name)
+ app_name, model_name = get_model_ct_tuple(self.model)
+ return render_to_response(self.change_list_template or [
+ 'admin/%s/%s/change_list.html' % (app_name, model_name),
+ 'admin/%s/change_list.html' % app_name,
+ 'admin/change_list.html'
+ ], context, context_instance=context_instance)
+
+
+class SearchModelAdmin(SearchModelAdminMixin, ModelAdmin):
+ pass
diff --git a/haystack/backends/__init__.py b/haystack/backends/__init__.py
new file mode 100644
index 0000000..bcf8554
--- /dev/null
+++ b/haystack/backends/__init__.py
@@ -0,0 +1,1041 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+import copy
+from copy import deepcopy
+from time import time
+from django.conf import settings
+from django.db.models import Q
+from django.db.models.base import ModelBase
+from django.utils import six
+from django.utils import tree
+from haystack.constants import VALID_FILTERS, FILTER_SEPARATOR, DEFAULT_ALIAS
+from haystack.exceptions import MoreLikeThisError, FacetingError
+from haystack.models import SearchResult
+from haystack.utils.loading import UnifiedIndex
+from haystack.utils import get_model_ct
+
+try:
+ from django.utils.encoding import force_text
+except ImportError:
+ from django.utils.encoding import force_unicode as force_text
+
+
+VALID_GAPS = ['year', 'month', 'day', 'hour', 'minute', 'second']
+
+
+def log_query(func):
+ """
+ A decorator for pseudo-logging search queries. Used in the ``SearchBackend``
+ to wrap the ``search`` method.
+ """
+ def wrapper(obj, query_string, *args, **kwargs):
+ start = time()
+
+ try:
+ return func(obj, query_string, *args, **kwargs)
+ finally:
+ stop = time()
+
+ if settings.DEBUG:
+ from haystack import connections
+ connections[obj.connection_alias].queries.append({
+ 'query_string': query_string,
+ 'additional_args': args,
+ 'additional_kwargs': kwargs,
+ 'time': "%.3f" % (stop - start),
+ 'start': start,
+ 'stop': stop,
+ })
+
+ return wrapper
+
+
+class EmptyResults(object):
+ hits = 0
+ docs = []
+
+ def __len__(self):
+ return 0
+
+ def __getitem__(self, k):
+ if isinstance(k, slice):
+ return []
+ else:
+ raise IndexError("It's not here.")
+
+
+class BaseSearchBackend(object):
+ """
+ Abstract search engine base class.
+ """
+ # Backends should include their own reserved words/characters.
+ RESERVED_WORDS = []
+ RESERVED_CHARACTERS = []
+
+ def __init__(self, connection_alias, **connection_options):
+ self.connection_alias = connection_alias
+ self.timeout = connection_options.get('TIMEOUT', 10)
+ self.include_spelling = connection_options.get('INCLUDE_SPELLING', False)
+ self.batch_size = connection_options.get('BATCH_SIZE', 1000)
+ self.silently_fail = connection_options.get('SILENTLY_FAIL', True)
+ self.distance_available = connection_options.get('DISTANCE_AVAILABLE', False)
+
+ def update(self, index, iterable):
+ """
+ Updates the backend when given a SearchIndex and a collection of
+ documents.
+
+ This method MUST be implemented by each backend, as it will be highly
+ specific to each one.
+ """
+ raise NotImplementedError
+
+ def remove(self, obj_or_string):
+ """
+ Removes a document/object from the backend. Can be either a model
+ instance or the identifier (i.e. ``app_name.model_name.id``) in the
+ event the object no longer exists.
+
+ This method MUST be implemented by each backend, as it will be highly
+ specific to each one.
+ """
+ raise NotImplementedError
+
+ def clear(self, models=[], commit=True):
+ """
+ Clears the backend of all documents/objects for a collection of models.
+
+ This method MUST be implemented by each backend, as it will be highly
+ specific to each one.
+ """
+ raise NotImplementedError
+
+ @log_query
+ def search(self, query_string, **kwargs):
+ """
+ Takes a query to search on and returns dictionary.
+
+ The query should be a string that is appropriate syntax for the backend.
+
+ The returned dictionary should contain the keys 'results' and 'hits'.
+ The 'results' value should be an iterable of populated SearchResult
+ objects. The 'hits' should be an integer count of the number of matched
+ results the search backend found.
+
+ This method MUST be implemented by each backend, as it will be highly
+ specific to each one.
+ """
+ raise NotImplementedError
+
+ def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None,
+ fields='', highlight=False, facets=None,
+ date_facets=None, query_facets=None,
+ narrow_queries=None, spelling_query=None,
+ within=None, dwithin=None, distance_point=None,
+ models=None, limit_to_registered_models=None,
+ result_class=None):
+ # A convenience method most backends should include in order to make
+ # extension easier.
+ raise NotImplementedError
+
+ def prep_value(self, value):
+ """
+ Hook to give the backend a chance to prep an attribute value before
+ sending it to the search engine. By default, just force it to unicode.
+ """
+ return force_text(value)
+
+ def more_like_this(self, model_instance, additional_query_string=None, result_class=None):
+ """
+ Takes a model object and returns results the backend thinks are similar.
+
+ This method MUST be implemented by each backend, as it will be highly
+ specific to each one.
+ """
+ raise NotImplementedError("Subclasses must provide a way to fetch similar record via the 'more_like_this' method if supported by the backend.")
+
+ def extract_file_contents(self, file_obj):
+ """
+ Hook to allow backends which support rich-content types such as PDF,
+ Word, etc. extraction to process the provided file object and return
+ the contents for indexing
+
+ Returns None if metadata cannot be extracted; otherwise returns a
+ dictionary containing at least two keys:
+
+ :contents:
+ Extracted full-text content, if applicable
+ :metadata:
+ key:value pairs of text strings
+ """
+
+ raise NotImplementedError("Subclasses must provide a way to extract metadata via the 'extract' method if supported by the backend.")
+
+ def build_schema(self, fields):
+ """
+ Takes a dictionary of fields and returns schema information.
+
+ This method MUST be implemented by each backend, as it will be highly
+ specific to each one.
+ """
+ raise NotImplementedError("Subclasses must provide a way to build their schema.")
+
+ def build_models_list(self):
+ """
+ Builds a list of models for searching.
+
+ The ``search`` method should use this and the ``django_ct`` field to
+ narrow the results (unless the user indicates not to). This helps ignore
+ any results that are not currently handled models and ensures
+ consistent caching.
+ """
+ from haystack import connections
+ models = []
+
+ for model in connections[self.connection_alias].get_unified_index().get_indexed_models():
+ models.append(get_model_ct(model))
+
+ return models
+
+
+# Alias for easy loading within SearchQuery objects.
+SearchBackend = BaseSearchBackend
+
+
+class SearchNode(tree.Node):
+ """
+ Manages an individual condition within a query.
+
+ Most often, this will be a lookup to ensure that a certain word or phrase
+ appears in the documents being indexed. However, it also supports filtering
+ types (such as 'lt', 'gt', 'in' and others) for more complex lookups.
+
+ This object creates a tree, with children being a list of either more
+ ``SQ`` objects or the expressions/values themselves.
+ """
+ AND = 'AND'
+ OR = 'OR'
+ default = AND
+
+ # Start compat. Django 1.6 changed how ``tree.Node`` works, so we're going
+ # to patch back in the original implementation until time to rewrite this
+ # presents itself.
+ # See https://github.com/django/django/commit/d3f00bd.
+
+ def __init__(self, children=None, connector=None, negated=False):
+ """
+ Constructs a new Node. If no connector is given, the default will be
+ used.
+
+ Warning: You probably don't want to pass in the 'negated' parameter. It
+ is NOT the same as constructing a node and calling negate() on the
+ result.
+ """
+ self.children = children and children[:] or []
+ self.connector = connector or self.default
+ self.subtree_parents = []
+ self.negated = negated
+
+ # We need this because of django.db.models.query_utils.Q. Q. __init__() is
+ # problematic, but it is a natural Node subclass in all other respects.
+ def _new_instance(cls, children=None, connector=None, negated=False):
+ """
+ This is called to create a new instance of this class when we need new
+ Nodes (or subclasses) in the internal code in this class. Normally, it
+ just shadows __init__(). However, subclasses with an __init__ signature
+ that is not an extension of Node.__init__ might need to implement this
+ method to allow a Node to create a new instance of them (if they have
+ any extra setting up to do).
+ """
+ obj = SearchNode(children, connector, negated)
+ obj.__class__ = cls
+ return obj
+ _new_instance = classmethod(_new_instance)
+
+ def __str__(self):
+ if self.negated:
+ return '(NOT (%s: %s))' % (self.connector, ', '.join([str(c) for c
+ in self.children]))
+ return '(%s: %s)' % (self.connector, ', '.join([str(c) for c in
+ self.children]))
+
+ def __deepcopy__(self, memodict):
+ """
+ Utility method used by copy.deepcopy().
+ """
+ obj = SearchNode(connector=self.connector, negated=self.negated)
+ obj.__class__ = self.__class__
+ obj.children = copy.deepcopy(self.children, memodict)
+ obj.subtree_parents = copy.deepcopy(self.subtree_parents, memodict)
+ return obj
+
+ def __len__(self):
+ """
+ The size of a node if the number of children it has.
+ """
+ return len(self.children)
+
+ def __bool__(self):
+ """
+ For truth value testing.
+ """
+ return bool(self.children)
+
+ def __nonzero__(self): # Python 2 compatibility
+ return type(self).__bool__(self)
+
+ def __contains__(self, other):
+ """
+ Returns True is 'other' is a direct child of this instance.
+ """
+ return other in self.children
+
+ def add(self, node, conn_type):
+ """
+ Adds a new node to the tree. If the conn_type is the same as the root's
+ current connector type, the node is added to the first level.
+ Otherwise, the whole tree is pushed down one level and a new root
+ connector is created, connecting the existing tree and the new node.
+ """
+ if node in self.children and conn_type == self.connector:
+ return
+ if len(self.children) < 2:
+ self.connector = conn_type
+ if self.connector == conn_type:
+ if isinstance(node, SearchNode) and (node.connector == conn_type or
+ len(node) == 1):
+ self.children.extend(node.children)
+ else:
+ self.children.append(node)
+ else:
+ obj = self._new_instance(self.children, self.connector,
+ self.negated)
+ self.connector = conn_type
+ self.children = [obj, node]
+
+ def negate(self):
+ """
+ Negate the sense of the root connector. This reorganises the children
+ so that the current node has a single child: a negated node containing
+ all the previous children. This slightly odd construction makes adding
+ new children behave more intuitively.
+
+ Interpreting the meaning of this negate is up to client code. This
+ method is useful for implementing "not" arrangements.
+ """
+ self.children = [self._new_instance(self.children, self.connector,
+ not self.negated)]
+ self.connector = self.default
+
+ def start_subtree(self, conn_type):
+ """
+ Sets up internal state so that new nodes are added to a subtree of the
+ current node. The conn_type specifies how the sub-tree is joined to the
+ existing children.
+ """
+ if len(self.children) == 1:
+ self.connector = conn_type
+ elif self.connector != conn_type:
+ self.children = [self._new_instance(self.children, self.connector,
+ self.negated)]
+ self.connector = conn_type
+ self.negated = False
+
+ self.subtree_parents.append(self.__class__(self.children,
+ self.connector, self.negated))
+ self.connector = self.default
+ self.negated = False
+ self.children = []
+
+ def end_subtree(self):
+ """
+ Closes off the most recently unmatched start_subtree() call.
+
+ This puts the current state into a node of the parent tree and returns
+ the current instances state to be the parent.
+ """
+ obj = self.subtree_parents.pop()
+ node = self.__class__(self.children, self.connector)
+ self.connector = obj.connector
+ self.negated = obj.negated
+ self.children = obj.children
+ self.children.append(node)
+
+ # End compat.
+
+ def __repr__(self):
+ return '<SQ: %s %s>' % (self.connector, self.as_query_string(self._repr_query_fragment_callback))
+
+ def _repr_query_fragment_callback(self, field, filter_type, value):
+ if six.PY3:
+ value = force_text(value)
+ else:
+ value = force_text(value).encode('utf8')
+
+ return "%s%s%s=%s" % (field, FILTER_SEPARATOR, filter_type, value)
+
+ def as_query_string(self, query_fragment_callback):
+ """
+ Produces a portion of the search query from the current SQ and its
+ children.
+ """
+ result = []
+
+ for child in self.children:
+ if hasattr(child, 'as_query_string'):
+ result.append(child.as_query_string(query_fragment_callback))
+ else:
+ expression, value = child
+ field, filter_type = self.split_expression(expression)
+ result.append(query_fragment_callback(field, filter_type, value))
+
+ conn = ' %s ' % self.connector
+ query_string = conn.join(result)
+
+ if query_string:
+ if self.negated:
+ query_string = 'NOT (%s)' % query_string
+ elif len(self.children) != 1:
+ query_string = '(%s)' % query_string
+
+ return query_string
+
+ def split_expression(self, expression):
+ """Parses an expression and determines the field and filter type."""
+ parts = expression.split(FILTER_SEPARATOR)
+ field = parts[0]
+
+ if len(parts) == 1 or parts[-1] not in VALID_FILTERS:
+ filter_type = 'contains'
+ else:
+ filter_type = parts.pop()
+
+ return (field, filter_type)
+
+
+class SQ(Q, SearchNode):
+ """
+ Manages an individual condition within a query.
+
+ Most often, this will be a lookup to ensure that a certain word or phrase
+ appears in the documents being indexed. However, it also supports filtering
+ types (such as 'lt', 'gt', 'in' and others) for more complex lookups.
+ """
+ pass
+
+
+class BaseSearchQuery(object):
+ """
+ A base class for handling the query itself.
+
+ This class acts as an intermediary between the ``SearchQuerySet`` and the
+ ``SearchBackend`` itself.
+
+ The ``SearchQuery`` object maintains a tree of ``SQ`` objects. Each ``SQ``
+ object supports what field it looks up against, what kind of lookup (i.e.
+ the __'s), what value it's looking for, if it's a AND/OR/NOT and tracks
+ any children it may have. The ``SearchQuery.build_query`` method starts with
+ the root of the tree, building part of the final query at each node until
+ the full final query is ready for the ``SearchBackend``.
+
+ Backends should extend this class and provide implementations for
+ ``build_query_fragment``, ``clean`` and ``run``. See the ``solr`` backend for an example
+ implementation.
+ """
+
+ def __init__(self, using=DEFAULT_ALIAS):
+ self.query_filter = SearchNode()
+ self.order_by = []
+ self.models = set()
+ self.boost = {}
+ self.start_offset = 0
+ self.end_offset = None
+ self.highlight = False
+ self.facets = {}
+ self.date_facets = {}
+ self.query_facets = []
+ self.narrow_queries = set()
+ #: If defined, fields should be a list of field names - no other values
+ #: will be retrieved so the caller must be careful to include django_ct
+ #: and django_id when using code which expects those to be included in
+ #: the results
+ self.fields = []
+ # Geospatial-related information
+ self.within = {}
+ self.dwithin = {}
+ self.distance_point = {}
+ # Internal.
+ self._raw_query = None
+ self._raw_query_params = {}
+ self._more_like_this = False
+ self._mlt_instance = None
+ self._results = None
+ self._hit_count = None
+ self._facet_counts = None
+ self._stats = None
+ self._spelling_suggestion = None
+ self.result_class = SearchResult
+ self.stats = {}
+ from haystack import connections
+ self._using = using
+ self.backend = connections[self._using].get_backend()
+
+ def __str__(self):
+ return self.build_query()
+
+ def __getstate__(self):
+ """For pickling."""
+ obj_dict = self.__dict__.copy()
+ del(obj_dict['backend'])
+ return obj_dict
+
+ def __setstate__(self, obj_dict):
+ """For unpickling."""
+ from haystack import connections
+ self.__dict__.update(obj_dict)
+ self.backend = connections[self._using].get_backend()
+
+ def has_run(self):
+ """Indicates if any query has been been run."""
+ return None not in (self._results, self._hit_count)
+
+ def build_params(self, spelling_query=None):
+ """Generates a list of params to use when searching."""
+ kwargs = {
+ 'start_offset': self.start_offset,
+ }
+
+ if self.order_by:
+ kwargs['sort_by'] = self.order_by
+
+ if self.end_offset is not None:
+ kwargs['end_offset'] = self.end_offset
+
+ if self.highlight:
+ kwargs['highlight'] = self.highlight
+
+ if self.facets:
+ kwargs['facets'] = self.facets
+
+ if self.date_facets:
+ kwargs['date_facets'] = self.date_facets
+
+ if self.query_facets:
+ kwargs['query_facets'] = self.query_facets
+
+ if self.narrow_queries:
+ kwargs['narrow_queries'] = self.narrow_queries
+
+ if spelling_query:
+ kwargs['spelling_query'] = spelling_query
+
+ if self.boost:
+ kwargs['boost'] = self.boost
+
+ if self.within:
+ kwargs['within'] = self.within
+
+ if self.dwithin:
+ kwargs['dwithin'] = self.dwithin
+
+ if self.distance_point:
+ kwargs['distance_point'] = self.distance_point
+
+ if self.result_class:
+ kwargs['result_class'] = self.result_class
+
+ if self.fields:
+ kwargs['fields'] = self.fields
+
+ if self.models:
+ kwargs['models'] = self.models
+
+ return kwargs
+
+ def run(self, spelling_query=None, **kwargs):
+ """Builds and executes the query. Returns a list of search results."""
+ final_query = self.build_query()
+ search_kwargs = self.build_params(spelling_query=spelling_query)
+
+ if kwargs:
+ search_kwargs.update(kwargs)
+
+ results = self.backend.search(final_query, **search_kwargs)
+ self._results = results.get('results', [])
+ self._hit_count = results.get('hits', 0)
+ self._facet_counts = self.post_process_facets(results)
+ self._spelling_suggestion = results.get('spelling_suggestion', None)
+
+ def run_mlt(self, **kwargs):
+ """
+ Executes the More Like This. Returns a list of search results similar
+ to the provided document (and optionally query).
+ """
+ if self._more_like_this is False or self._mlt_instance is None:
+ raise MoreLikeThisError("No instance was provided to determine 'More Like This' results.")
+
+ search_kwargs = {
+ 'result_class': self.result_class,
+ }
+
+ if self.models:
+ search_kwargs['models'] = self.models
+
+ if kwargs:
+ search_kwargs.update(kwargs)
+
+ additional_query_string = self.build_query()
+ results = self.backend.more_like_this(self._mlt_instance, additional_query_string, **search_kwargs)
+ self._results = results.get('results', [])
+ self._hit_count = results.get('hits', 0)
+
+ def run_raw(self, **kwargs):
+ """Executes a raw query. Returns a list of search results."""
+ search_kwargs = self.build_params()
+ search_kwargs.update(self._raw_query_params)
+
+ if kwargs:
+ search_kwargs.update(kwargs)
+
+ results = self.backend.search(self._raw_query, **search_kwargs)
+ self._results = results.get('results', [])
+ self._hit_count = results.get('hits', 0)
+ self._facet_counts = results.get('facets', {})
+ self._spelling_suggestion = results.get('spelling_suggestion', None)
+
+ def get_count(self):
+ """
+ Returns the number of results the backend found for the query.
+
+ If the query has not been run, this will execute the query and store
+ the results.
+ """
+ if self._hit_count is None:
+ # Limit the slice to 1 so we get a count without consuming
+ # everything.
+ if not self.end_offset:
+ self.end_offset = 1
+
+ if self._more_like_this:
+ # Special case for MLT.
+ self.run_mlt()
+ elif self._raw_query:
+ # Special case for raw queries.
+ self.run_raw()
+ else:
+ self.run()
+
+ return self._hit_count
+
+ def get_results(self, **kwargs):
+ """
+ Returns the results received from the backend.
+
+ If the query has not been run, this will execute the query and store
+ the results.
+ """
+ if self._results is None:
+ if self._more_like_this:
+ # Special case for MLT.
+ self.run_mlt(**kwargs)
+ elif self._raw_query:
+ # Special case for raw queries.
+ self.run_raw(**kwargs)
+ else:
+ self.run(**kwargs)
+
+ return self._results
+
+ def get_facet_counts(self):
+ """
+ Returns the facet counts received from the backend.
+
+ If the query has not been run, this will execute the query and store
+ the results.
+ """
+ if self._facet_counts is None:
+ self.run()
+
+ return self._facet_counts
+
+ def get_stats(self):
+ """
+ Returns the stats received from the backend.
+
+ If the query has not been run, this will execute the query and store
+ the results
+ """
+ if self._stats is None:
+ self.run()
+ return self._stats
+
+ def get_spelling_suggestion(self, preferred_query=None):
+ """
+ Returns the spelling suggestion received from the backend.
+
+ If the query has not been run, this will execute the query and store
+ the results.
+ """
+ if self._spelling_suggestion is None:
+ self.run(spelling_query=preferred_query)
+
+ return self._spelling_suggestion
+
+ def boost_fragment(self, boost_word, boost_value):
+ """Generates query fragment for boosting a single word/value pair."""
+ return "%s^%s" % (boost_word, boost_value)
+
+ def matching_all_fragment(self):
+ """Generates the query that matches all documents."""
+ return '*'
+
+ def build_query(self):
+ """
+ Interprets the collected query metadata and builds the final query to
+ be sent to the backend.
+ """
+ final_query = self.query_filter.as_query_string(self.build_query_fragment)
+
+ if not final_query:
+ # Match all.
+ final_query = self.matching_all_fragment()
+
+ if self.boost:
+ boost_list = []
+
+ for boost_word, boost_value in self.boost.items():
+ boost_list.append(self.boost_fragment(boost_word, boost_value))
+
+ final_query = "%s %s" % (final_query, " ".join(boost_list))
+
+ return final_query
+
+ def combine(self, rhs, connector=SQ.AND):
+ if connector == SQ.AND:
+ self.add_filter(rhs.query_filter)
+ elif connector == SQ.OR:
+ self.add_filter(rhs.query_filter, use_or=True)
+
+ # Methods for backends to implement.
+
+ def build_query_fragment(self, field, filter_type, value):
+ """
+ Generates a query fragment from a field, filter type and a value.
+
+ Must be implemented in backends as this will be highly backend specific.
+ """
+ raise NotImplementedError("Subclasses must provide a way to generate query fragments via the 'build_query_fragment' method.")
+
+
+ # Standard methods to alter the query.
+
+ def clean(self, query_fragment):
+ """
+ Provides a mechanism for sanitizing user input before presenting the
+ value to the backend.
+
+ A basic (override-able) implementation is provided.
+ """
+ if not isinstance(query_fragment, six.string_types):
+ return query_fragment
+
+ words = query_fragment.split()
+ cleaned_words = []
+
+ for word in words:
+ if word in self.backend.RESERVED_WORDS:
+ word = word.replace(word, word.lower())
+
+ for char in self.backend.RESERVED_CHARACTERS:
+ word = word.replace(char, '\\%s' % char)
+
+ cleaned_words.append(word)
+
+ return ' '.join(cleaned_words)
+
+ def build_not_query(self, query_string):
+ if ' ' in query_string:
+ query_string = "(%s)" % query_string
+
+ return u"NOT %s" % query_string
+
+ def build_exact_query(self, query_string):
+ return u'"%s"' % query_string
+
+ def add_filter(self, query_filter, use_or=False):
+ """
+ Adds a SQ to the current query.
+ """
+ if use_or:
+ connector = SQ.OR
+ else:
+ connector = SQ.AND
+
+ if self.query_filter and query_filter.connector != connector and len(query_filter) > 1:
+ self.query_filter.start_subtree(connector)
+ subtree = True
+ else:
+ subtree = False
+
+ for child in query_filter.children:
+ if isinstance(child, tree.Node):
+ self.query_filter.start_subtree(connector)
+ self.add_filter(child)
+ self.query_filter.end_subtree()
+ else:
+ expression, value = child
+ self.query_filter.add((expression, value), connector)
+
+ connector = query_filter.connector
+
+ if query_filter.negated:
+ self.query_filter.negate()
+
+ if subtree:
+ self.query_filter.end_subtree()
+
+ def add_order_by(self, field):
+ """Orders the search result by a field."""
+ self.order_by.append(field)
+
+ def clear_order_by(self):
+ """
+ Clears out all ordering that has been already added, reverting the
+ query to relevancy.
+ """
+ self.order_by = []
+
+ def add_model(self, model):
+ """
+ Restricts the query requiring matches in the given model.
+
+ This builds upon previous additions, so you can limit to multiple models
+ by chaining this method several times.
+ """
+ if not isinstance(model, ModelBase):
+ raise AttributeError('The model being added to the query must derive from Model.')
+
+ self.models.add(model)
+
+ def set_limits(self, low=None, high=None):
+ """Restricts the query by altering either the start, end or both offsets."""
+ if low is not None:
+ self.start_offset = int(low)
+
+ if high is not None:
+ self.end_offset = int(high)
+
+ def clear_limits(self):
+ """Clears any existing limits."""
+ self.start_offset, self.end_offset = 0, None
+
+ def add_boost(self, term, boost_value):
+ """Adds a boosted term and the amount to boost it to the query."""
+ self.boost[term] = boost_value
+
+ def raw_search(self, query_string, **kwargs):
+ """
+ Runs a raw query (no parsing) against the backend.
+
+ This method causes the SearchQuery to ignore the standard query
+ generating facilities, running only what was provided instead.
+
+ Note that any kwargs passed along will override anything provided
+ to the rest of the ``SearchQuerySet``.
+ """
+ self._raw_query = query_string
+ self._raw_query_params = kwargs
+
+ def more_like_this(self, model_instance):
+ """
+ Allows backends with support for "More Like This" to return results
+ similar to the provided instance.
+ """
+ self._more_like_this = True
+ self._mlt_instance = model_instance
+
+ def add_stats_query(self,stats_field,stats_facets):
+ """Adds stats and stats_facets queries for the Solr backend."""
+ self.stats[stats_field] = stats_facets
+
+ def add_highlight(self):
+ """Adds highlighting to the search results."""
+ self.highlight = True
+
+ def add_within(self, field, point_1, point_2):
+ """Adds bounding box parameters to search query."""
+ from haystack.utils.geo import ensure_point
+ self.within = {
+ 'field': field,
+ 'point_1': ensure_point(point_1),
+ 'point_2': ensure_point(point_2),
+ }
+
+ def add_dwithin(self, field, point, distance):
+ """Adds radius-based parameters to search query."""
+ from haystack.utils.geo import ensure_point, ensure_distance
+ self.dwithin = {
+ 'field': field,
+ 'point': ensure_point(point),
+ 'distance': ensure_distance(distance),
+ }
+
+ def add_distance(self, field, point):
+ """
+ Denotes that results should include distance measurements from the
+ point passed in.
+ """
+ from haystack.utils.geo import ensure_point
+ self.distance_point = {
+ 'field': field,
+ 'point': ensure_point(point),
+ }
+
+ def add_field_facet(self, field, **options):
+ """Adds a regular facet on a field."""
+ from haystack import connections
+ field_name = connections[self._using].get_unified_index().get_facet_fieldname(field)
+ self.facets[field_name] = options.copy()
+
+ def add_date_facet(self, field, start_date, end_date, gap_by, gap_amount=1):
+ """Adds a date-based facet on a field."""
+ from haystack import connections
+ if not gap_by in VALID_GAPS:
+ raise FacetingError("The gap_by ('%s') must be one of the following: %s." % (gap_by, ', '.join(VALID_GAPS)))
+
+ details = {
+ 'start_date': start_date,
+ 'end_date': end_date,
+ 'gap_by': gap_by,
+ 'gap_amount': gap_amount,
+ }
+ self.date_facets[connections[self._using].get_unified_index().get_facet_fieldname(field)] = details
+
+ def add_query_facet(self, field, query):
+ """Adds a query facet on a field."""
+ from haystack import connections
+ self.query_facets.append((connections[self._using].get_unified_index().get_facet_fieldname(field), query))
+
+ def add_narrow_query(self, query):
+ """
+ Narrows a search to a subset of all documents per the query.
+
+ Generally used in conjunction with faceting.
+ """
+ self.narrow_queries.add(query)
+
+ def set_result_class(self, klass):
+ """
+ Sets the result class to use for results.
+
+ Overrides any previous usages. If ``None`` is provided, Haystack will
+ revert back to the default ``SearchResult`` object.
+ """
+ if klass is None:
+ klass = SearchResult
+
+ self.result_class = klass
+
+ def post_process_facets(self, results):
+ # Handle renaming the facet fields. Undecorate and all that.
+ from haystack import connections
+ revised_facets = {}
+ field_data = connections[self._using].get_unified_index().all_searchfields()
+
+ for facet_type, field_details in results.get('facets', {}).items():
+ temp_facets = {}
+
+ for field, field_facets in field_details.items():
+ fieldname = field
+ if field in field_data and hasattr(field_data[field], 'get_facet_for_name'):
+ fieldname = field_data[field].get_facet_for_name()
+
+ temp_facets[fieldname] = field_facets
+
+ revised_facets[facet_type] = temp_facets
+
+ return revised_facets
+
+ def using(self, using=None):
+ """
+ Allows for overriding which connection should be used. This
+ disables the use of routers when performing the query.
+
+ If ``None`` is provided, it has no effect on what backend is used.
+ """
+ return self._clone(using=using)
+
+ def _reset(self):
+ """
+ Resets the instance's internal state to appear as though no query has
+ been run before. Only need to tweak a few variables we check.
+ """
+ self._results = None
+ self._hit_count = None
+ self._facet_counts = None
+ self._spelling_suggestion = None
+
+ def _clone(self, klass=None, using=None):
+ if using is None:
+ using = self._using
+ else:
+ from haystack import connections
+ klass = connections[using].query
+
+ if klass is None:
+ klass = self.__class__
+
+ clone = klass(using=using)
+ clone.query_filter = deepcopy(self.query_filter)
+ clone.order_by = self.order_by[:]
+ clone.models = self.models.copy()
+ clone.boost = self.boost.copy()
+ clone.highlight = self.highlight
+ clone.stats = self.stats.copy()
+ clone.facets = self.facets.copy()
+ clone.date_facets = self.date_facets.copy()
+ clone.query_facets = self.query_facets[:]
+ clone.narrow_queries = self.narrow_queries.copy()
+ clone.start_offset = self.start_offset
+ clone.end_offset = self.end_offset
+ clone.result_class = self.result_class
+ clone.within = self.within.copy()
+ clone.dwithin = self.dwithin.copy()
+ clone.distance_point = self.distance_point.copy()
+ clone._raw_query = self._raw_query
+ clone._raw_query_params = self._raw_query_params
+
+ return clone
+
+
+class BaseEngine(object):
+ backend = BaseSearchBackend
+ query = BaseSearchQuery
+ unified_index = UnifiedIndex
+
+ def __init__(self, using=None):
+ if using is None:
+ using = DEFAULT_ALIAS
+
+ self.using = using
+ self.options = settings.HAYSTACK_CONNECTIONS.get(self.using, {})
+ self.queries = []
+ self._index = None
+ self._backend = None
+
+ def get_backend(self):
+ if self._backend is None:
+ self._backend = self.backend(self.using, **self.options)
+ return self._backend
+
+ def get_query(self):
+ return self.query(using=self.using)
+
+ def reset_queries(self):
+ self.queries = []
+
+ def get_unified_index(self):
+ if self._index is None:
+ self._index = self.unified_index(self.options.get('EXCLUDED_INDEXES', []))
+
+ return self._index
diff --git a/haystack/backends/elasticsearch_backend.py b/haystack/backends/elasticsearch_backend.py
new file mode 100644
index 0000000..a57bb12
--- /dev/null
+++ b/haystack/backends/elasticsearch_backend.py
@@ -0,0 +1,944 @@
+# encoding: utf-8
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import datetime
+import re
+import warnings
+
+from django.conf import settings
+from django.core.exceptions import ImproperlyConfigured
+from django.utils import six
+
+import haystack
+from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, log_query
+from haystack.constants import DEFAULT_OPERATOR, DJANGO_CT, DJANGO_ID, ID
+from haystack.exceptions import MissingDependency, MoreLikeThisError, SkipDocument
+from haystack.inputs import Clean, Exact, PythonData, Raw
+from haystack.models import SearchResult
+from haystack.utils import log as logging
+from haystack.utils import get_identifier, get_model_ct
+from haystack.utils.app_loading import haystack_get_model
+
+try:
+ import elasticsearch
+ from elasticsearch.helpers import bulk_index
+ from elasticsearch.exceptions import NotFoundError
+except ImportError:
+ raise MissingDependency("The 'elasticsearch' backend requires the installation of 'elasticsearch'. Please refer to the documentation.")
+
+
+DATETIME_REGEX = re.compile(
+ r'^(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})T'
+ r'(?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2})(\.\d+)?$')
+
+
+class ElasticsearchSearchBackend(BaseSearchBackend):
+ # Word reserved by Elasticsearch for special use.
+ RESERVED_WORDS = (
+ 'AND',
+ 'NOT',
+ 'OR',
+ 'TO',
+ )
+
+ # Characters reserved by Elasticsearch for special use.
+ # The '\\' must come first, so as not to overwrite the other slash replacements.
+ RESERVED_CHARACTERS = (
+ '\\', '+', '-', '&&', '||', '!', '(', ')', '{', '}',
+ '[', ']', '^', '"', '~', '*', '?', ':', '/',
+ )
+
+ # Settings to add an n-gram & edge n-gram analyzer.
+ DEFAULT_SETTINGS = {
+ 'settings': {
+ "analysis": {
+ "analyzer": {
+ "ngram_analyzer": {
+ "type": "custom",
+ "tokenizer": "standard",
+ "filter": ["haystack_ngram", "lowercase"]
+ },
+ "edgengram_analyzer": {
+ "type": "custom",
+ "tokenizer": "standard",
+ "filter": ["haystack_edgengram", "lowercase"]
+ }
+ },
+ "tokenizer": {
+ "haystack_ngram_tokenizer": {
+ "type": "nGram",
+ "min_gram": 3,
+ "max_gram": 15,
+ },
+ "haystack_edgengram_tokenizer": {
+ "type": "edgeNGram",
+ "min_gram": 2,
+ "max_gram": 15,
+ "side": "front"
+ }
+ },
+ "filter": {
+ "haystack_ngram": {
+ "type": "nGram",
+ "min_gram": 3,
+ "max_gram": 15
+ },
+ "haystack_edgengram": {
+ "type": "edgeNGram",
+ "min_gram": 2,
+ "max_gram": 15
+ }
+ }
+ }
+ }
+ }
+
+ def __init__(self, connection_alias, **connection_options):
+ super(ElasticsearchSearchBackend, self).__init__(connection_alias, **connection_options)
+
+ if not 'URL' in connection_options:
+ raise ImproperlyConfigured("You must specify a 'URL' in your settings for connection '%s'." % connection_alias)
+
+ if not 'INDEX_NAME' in connection_options:
+ raise ImproperlyConfigured("You must specify a 'INDEX_NAME' in your settings for connection '%s'." % connection_alias)
+
+ self.conn = elasticsearch.Elasticsearch(connection_options['URL'], timeout=self.timeout, **connection_options.get('KWARGS', {}))
+ self.index_name = connection_options['INDEX_NAME']
+ self.log = logging.getLogger('haystack')
+ self.setup_complete = False
+ self.existing_mapping = {}
+
+ def setup(self):
+ """
+ Defers loading until needed.
+ """
+ # Get the existing mapping & cache it. We'll compare it
+ # during the ``update`` & if it doesn't match, we'll put the new
+ # mapping.
+ try:
+ self.existing_mapping = self.conn.indices.get_mapping(index=self.index_name)
+ except NotFoundError:
+ pass
+ except Exception:
+ if not self.silently_fail:
+ raise
+
+ unified_index = haystack.connections[self.connection_alias].get_unified_index()
+ self.content_field_name, field_mapping = self.build_schema(unified_index.all_searchfields())
+ current_mapping = {
+ 'modelresult': {
+ 'properties': field_mapping,
+ '_boost': {
+ 'name': 'boost',
+ 'null_value': 1.0
+ }
+ }
+ }
+
+ if current_mapping != self.existing_mapping:
+ try:
+ # Make sure the index is there first.
+ self.conn.indices.create(index=self.index_name, body=self.DEFAULT_SETTINGS, ignore=400)
+ self.conn.indices.put_mapping(index=self.index_name, doc_type='modelresult', body=current_mapping)
+ self.existing_mapping = current_mapping
+ except Exception:
+ if not self.silently_fail:
+ raise
+
+ self.setup_complete = True
+
+ def update(self, index, iterable, commit=True):
+ if not self.setup_complete:
+ try:
+ self.setup()
+ except elasticsearch.TransportError as e:
+ if not self.silently_fail:
+ raise
+
+ self.log.error("Failed to add documents to Elasticsearch: %s", e)
+ return
+
+ prepped_docs = []
+
+ for obj in iterable:
+ try:
+ prepped_data = index.full_prepare(obj)
+ final_data = {}
+
+ # Convert the data to make sure it's happy.
+ for key, value in prepped_data.items():
+ final_data[key] = self._from_python(value)
+ final_data['_id'] = final_data[ID]
+
+ prepped_docs.append(final_data)
+ except SkipDocument:
+ self.log.debug(u"Indexing for object `%s` skipped", obj)
+ except elasticsearch.TransportError as e:
+ if not self.silently_fail:
+ raise
+
+ # We'll log the object identifier but won't include the actual object
+ # to avoid the possibility of that generating encoding errors while
+ # processing the log message:
+ self.log.error(u"%s while preparing object for update" % e.__class__.__name__, exc_info=True, extra={
+ "data": {
+ "index": index,
+ "object": get_identifier(obj)
+ }
+ })
+
+ bulk_index(self.conn, prepped_docs, index=self.index_name, doc_type='modelresult')
+
+ if commit:
+ self.conn.indices.refresh(index=self.index_name)
+
+ def remove(self, obj_or_string, commit=True):
+ doc_id = get_identifier(obj_or_string)
+
+ if not self.setup_complete:
+ try:
+ self.setup()
+ except elasticsearch.TransportError as e:
+ if not self.silently_fail:
+ raise
+
+ self.log.error("Failed to remove document '%s' from Elasticsearch: %s", doc_id, e)
+ return
+
+ try:
+ self.conn.delete(index=self.index_name, doc_type='modelresult', id=doc_id, ignore=404)
+
+ if commit:
+ self.conn.indices.refresh(index=self.index_name)
+ except elasticsearch.TransportError as e:
+ if not self.silently_fail:
+ raise
+
+ self.log.error("Failed to remove document '%s' from Elasticsearch: %s", doc_id, e)
+
+ def clear(self, models=[], commit=True):
+ # We actually don't want to do this here, as mappings could be
+ # very different.
+ # if not self.setup_complete:
+ # self.setup()
+
+ try:
+ if not models:
+ self.conn.indices.delete(index=self.index_name, ignore=404)
+ self.setup_complete = False
+ self.existing_mapping = {}
+ else:
+ models_to_delete = []
+
+ for model in models:
+ models_to_delete.append("%s:%s" % (DJANGO_CT, get_model_ct(model)))
+
+ # Delete by query in Elasticsearch asssumes you're dealing with
+ # a ``query`` root object. :/
+ query = {'query': {'query_string': {'query': " OR ".join(models_to_delete)}}}
+ self.conn.delete_by_query(index=self.index_name, doc_type='modelresult', body=query)
+ except elasticsearch.TransportError as e:
+ if not self.silently_fail:
+ raise
+
+ if len(models):
+ self.log.error("Failed to clear Elasticsearch index of models '%s': %s", ','.join(models_to_delete), e)
+ else:
+ self.log.error("Failed to clear Elasticsearch index: %s", e)
+
+ def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None,
+ fields='', highlight=False, facets=None,
+ date_facets=None, query_facets=None,
+ narrow_queries=None, spelling_query=None,
+ within=None, dwithin=None, distance_point=None,
+ models=None, limit_to_registered_models=None,
+ result_class=None):
+ index = haystack.connections[self.connection_alias].get_unified_index()
+ content_field = index.document_field
+
+ if query_string == '*:*':
+ kwargs = {
+ 'query': {
+ "match_all": {}
+ },
+ }
+ else:
+ kwargs = {
+ 'query': {
+ 'query_string': {
+ 'default_field': content_field,
+ 'default_operator': DEFAULT_OPERATOR,
+ 'query': query_string,
+ 'analyze_wildcard': True,
+ 'auto_generate_phrase_queries': True,
+ },
+ },
+ }
+
+ # so far, no filters
+ filters = []
+
+ if fields:
+ if isinstance(fields, (list, set)):
+ fields = " ".join(fields)
+
+ kwargs['fields'] = fields
+
+ if sort_by is not None:
+ order_list = []
+ for field, direction in sort_by:
+ if field == 'distance' and distance_point:
+ # Do the geo-enabled sort.
+ lng, lat = distance_point['point'].get_coords()
+ sort_kwargs = {
+ "_geo_distance": {
+ distance_point['field']: [lng, lat],
+ "order": direction,
+ "unit": "km"
+ }
+ }
+ else:
+ if field == 'distance':
+ warnings.warn("In order to sort by distance, you must call the '.distance(...)' method.")
+
+ # Regular sorting.
+ sort_kwargs = {field: {'order': direction}}
+
+ order_list.append(sort_kwargs)
+
+ kwargs['sort'] = order_list
+
+ # From/size offsets don't seem to work right in Elasticsearch's DSL. :/
+ # if start_offset is not None:
+ # kwargs['from'] = start_offset
+
+ # if end_offset is not None:
+ # kwargs['size'] = end_offset - start_offset
+
+ if highlight is True:
+ kwargs['highlight'] = {
+ 'fields': {
+ content_field: {'store': 'yes'},
+ }
+ }
+
+ if self.include_spelling:
+ kwargs['suggest'] = {
+ 'suggest': {
+ 'text': spelling_query or query_string,
+ 'term': {
+ # Using content_field here will result in suggestions of stemmed words.
+ 'field': '_all',
+ },
+ },
+ }
+
+ if narrow_queries is None:
+ narrow_queries = set()
+
+ if facets is not None:
+ kwargs.setdefault('facets', {})
+
+ for facet_fieldname, extra_options in facets.items():
+ facet_options = {
+ 'terms': {
+ 'field': facet_fieldname,
+ 'size': 100,
+ },
+ }
+ # Special cases for options applied at the facet level (not the terms level).
+ if extra_options.pop('global_scope', False):
+ # Renamed "global_scope" since "global" is a python keyword.
+ facet_options['global'] = True
+ if 'facet_filter' in extra_options:
+ facet_options['facet_filter'] = extra_options.pop('facet_filter')
+ facet_options['terms'].update(extra_options)
+ kwargs['facets'][facet_fieldname] = facet_options
+
+ if date_facets is not None:
+ kwargs.setdefault('facets', {})
+
+ for facet_fieldname, value in date_facets.items():
+ # Need to detect on gap_by & only add amount if it's more than one.
+ interval = value.get('gap_by').lower()
+
+ # Need to detect on amount (can't be applied on months or years).
+ if value.get('gap_amount', 1) != 1 and interval not in ('month', 'year'):
+ # Just the first character is valid for use.
+ interval = "%s%s" % (value['gap_amount'], interval[:1])
+
+ kwargs['facets'][facet_fieldname] = {
+ 'date_histogram': {
+ 'field': facet_fieldname,
+ 'interval': interval,
+ },
+ 'facet_filter': {
+ "range": {
+ facet_fieldname: {
+ 'from': self._from_python(value.get('start_date')),
+ 'to': self._from_python(value.get('end_date')),
+ }
+ }
+ }
+ }
+
+ if query_facets is not None:
+ kwargs.setdefault('facets', {})
+
+ for facet_fieldname, value in query_facets:
+ kwargs['facets'][facet_fieldname] = {
+ 'query': {
+ 'query_string': {
+ 'query': value,
+ }
+ },
+ }
+
+ if limit_to_registered_models is None:
+ limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True)
+
+ if models and len(models):
+ model_choices = sorted(get_model_ct(model) for model in models)
+ elif limit_to_registered_models:
+ # Using narrow queries, limit the results to only models handled
+ # with the current routers.
+ model_choices = self.build_models_list()
+ else:
+ model_choices = []
+
+ if len(model_choices) > 0:
+ filters.append({"terms": {DJANGO_CT: model_choices}})
+
+ for q in narrow_queries:
+ filters.append({
+ 'fquery': {
+ 'query': {
+ 'query_string': {
+ 'query': q
+ },
+ },
+ '_cache': True,
+ }
+ })
+
+ if within is not None:
+ from haystack.utils.geo import generate_bounding_box
+
+ ((south, west), (north, east)) = generate_bounding_box(within['point_1'], within['point_2'])
+ within_filter = {
+ "geo_bounding_box": {
+ within['field']: {
+ "top_left": {
+ "lat": north,
+ "lon": west
+ },
+ "bottom_right": {
+ "lat": south,
+ "lon": east
+ }
+ }
+ },
+ }
+ filters.append(within_filter)
+
+ if dwithin is not None:
+ lng, lat = dwithin['point'].get_coords()
+
+ # NB: the 1.0.0 release of elasticsearch introduce an
+ # incompatible change on the distance filter formating
+ if elasticsearch.VERSION >= (1, 0, 0):
+ distance = "%(dist).6f%(unit)s" % {
+ 'dist': dwithin['distance'].km,
+ 'unit': "km"
+ }
+ else:
+ distance = dwithin['distance'].km
+
+ dwithin_filter = {
+ "geo_distance": {
+ "distance": distance,
+ dwithin['field']: {
+ "lat": lat,
+ "lon": lng
+ }
+ }
+ }
+ filters.append(dwithin_filter)
+
+ # if we want to filter, change the query type to filteres
+ if filters:
+ kwargs["query"] = {"filtered": {"query": kwargs.pop("query")}}
+ if len(filters) == 1:
+ kwargs['query']['filtered']["filter"] = filters[0]
+ else:
+ kwargs['query']['filtered']["filter"] = {"bool": {"must": filters}}
+
+ return kwargs
+
+ @log_query
+ def search(self, query_string, **kwargs):
+ if len(query_string) == 0:
+ return {
+ 'results': [],
+ 'hits': 0,
+ }
+
+ if not self.setup_complete:
+ self.setup()
+
+ search_kwargs = self.build_search_kwargs(query_string, **kwargs)
+ search_kwargs['from'] = kwargs.get('start_offset', 0)
+
+ order_fields = set()
+ for order in search_kwargs.get('sort', []):
+ for key in order.keys():
+ order_fields.add(key)
+
+ geo_sort = '_geo_distance' in order_fields
+
+ end_offset = kwargs.get('end_offset')
+ start_offset = kwargs.get('start_offset', 0)
+ if end_offset is not None and end_offset > start_offset:
+ search_kwargs['size'] = end_offset - start_offset
+
+ try:
+ raw_results = self.conn.search(body=search_kwargs,
+ index=self.index_name,
+ doc_type='modelresult',
+ _source=True)
+ except elasticsearch.TransportError as e:
+ if not self.silently_fail:
+ raise
+
+ self.log.error("Failed to query Elasticsearch using '%s': %s", query_string, e)
+ raw_results = {}
+
+ return self._process_results(raw_results,
+ highlight=kwargs.get('highlight'),
+ result_class=kwargs.get('result_class', SearchResult),
+ distance_point=kwargs.get('distance_point'),
+ geo_sort=geo_sort)
+
+ def more_like_this(self, model_instance, additional_query_string=None,
+ start_offset=0, end_offset=None, models=None,
+ limit_to_registered_models=None, result_class=None, **kwargs):
+ from haystack import connections
+
+ if not self.setup_complete:
+ self.setup()
+
+ # Deferred models will have a different class ("RealClass_Deferred_fieldname")
+ # which won't be in our registry:
+ model_klass = model_instance._meta.concrete_model
+
+ index = connections[self.connection_alias].get_unified_index().get_index(model_klass)
+ field_name = index.get_content_field()
+ params = {}
+
+ if start_offset is not None:
+ params['search_from'] = start_offset
+
+ if end_offset is not None:
+ params['search_size'] = end_offset - start_offset
+
+ doc_id = get_identifier(model_instance)
+
+ try:
+ raw_results = self.conn.mlt(index=self.index_name, doc_type='modelresult', id=doc_id, mlt_fields=[field_name], **params)
+ except elasticsearch.TransportError as e:
+ if not self.silently_fail:
+ raise
+
+ self.log.error("Failed to fetch More Like This from Elasticsearch for document '%s': %s", doc_id, e)
+ raw_results = {}
+
+ return self._process_results(raw_results, result_class=result_class)
+
+ def _process_results(self, raw_results, highlight=False,
+ result_class=None, distance_point=None,
+ geo_sort=False):
+ from haystack import connections
+ results = []
+ hits = raw_results.get('hits', {}).get('total', 0)
+ facets = {}
+ spelling_suggestion = None
+
+ if result_class is None:
+ result_class = SearchResult
+
+ if self.include_spelling and 'suggest' in raw_results:
+ raw_suggest = raw_results['suggest'].get('suggest')
+ if raw_suggest:
+ spelling_suggestion = ' '.join([word['text'] if len(word['options']) == 0 else word['options'][0]['text'] for word in raw_suggest])
+
+ if 'facets' in raw_results:
+ facets = {
+ 'fields': {},
+ 'dates': {},
+ 'queries': {},
+ }
+
+ for facet_fieldname, facet_info in raw_results['facets'].items():
+ if facet_info.get('_type', 'terms') == 'terms':
+ facets['fields'][facet_fieldname] = [(individual['term'], individual['count']) for individual in facet_info['terms']]
+ elif facet_info.get('_type', 'terms') == 'date_histogram':
+ # Elasticsearch provides UTC timestamps with an extra three
+ # decimals of precision, which datetime barfs on.
+ facets['dates'][facet_fieldname] = [(datetime.datetime.utcfromtimestamp(individual['time'] / 1000), individual['count']) for individual in facet_info['entries']]
+ elif facet_info.get('_type', 'terms') == 'query':
+ facets['queries'][facet_fieldname] = facet_info['count']
+
+ unified_index = connections[self.connection_alias].get_unified_index()
+ indexed_models = unified_index.get_indexed_models()
+ content_field = unified_index.document_field
+
+ for raw_result in raw_results.get('hits', {}).get('hits', []):
+ source = raw_result['_source']
+ app_label, model_name = source[DJANGO_CT].split('.')
+ additional_fields = {}
+ model = haystack_get_model(app_label, model_name)
+
+ if model and model in indexed_models:
+ for key, value in source.items():
+ index = unified_index.get_index(model)
+ string_key = str(key)
+
+ if string_key in index.fields and hasattr(index.fields[string_key], 'convert'):
+ additional_fields[string_key] = index.fields[string_key].convert(value)
+ else:
+ additional_fields[string_key] = self._to_python(value)
+
+ del(additional_fields[DJANGO_CT])
+ del(additional_fields[DJANGO_ID])
+
+ if 'highlight' in raw_result:
+ additional_fields['highlighted'] = raw_result['highlight'].get(content_field, '')
+
+ if distance_point:
+ additional_fields['_point_of_origin'] = distance_point
+
+ if geo_sort and raw_result.get('sort'):
+ from haystack.utils.geo import Distance
+ additional_fields['_distance'] = Distance(km=float(raw_result['sort'][0]))
+ else:
+ additional_fields['_distance'] = None
+
+ result = result_class(app_label, model_name, source[DJANGO_ID], raw_result['_score'], **additional_fields)
+ results.append(result)
+ else:
+ hits -= 1
+
+ return {
+ 'results': results,
+ 'hits': hits,
+ 'facets': facets,
+ 'spelling_suggestion': spelling_suggestion,
+ }
+
+ def build_schema(self, fields):
+ content_field_name = ''
+ mapping = {
+ DJANGO_CT: {'type': 'string', 'index': 'not_analyzed', 'include_in_all': False},
+ DJANGO_ID: {'type': 'string', 'index': 'not_analyzed', 'include_in_all': False},
+ }
+
+ for field_name, field_class in fields.items():
+ field_mapping = FIELD_MAPPINGS.get(field_class.field_type, DEFAULT_FIELD_MAPPING).copy()
+ if field_class.boost != 1.0:
+ field_mapping['boost'] = field_class.boost
+
+ if field_class.document is True:
+ content_field_name = field_class.index_fieldname
+
+ # Do this last to override `text` fields.
+ if field_mapping['type'] == 'string':
+ if field_class.indexed is False or hasattr(field_class, 'facet_for'):
+ field_mapping['index'] = 'not_analyzed'
+ del field_mapping['analyzer']
+
+ mapping[field_class.index_fieldname] = field_mapping
+
+ return (content_field_name, mapping)
+
+ def _iso_datetime(self, value):
+ """
+ If value appears to be something datetime-like, return it in ISO format.
+
+ Otherwise, return None.
+ """
+ if hasattr(value, 'strftime'):
+ if hasattr(value, 'hour'):
+ return value.isoformat()
+ else:
+ return '%sT00:00:00' % value.isoformat()
+
+ def _from_python(self, value):
+ """Convert more Python data types to ES-understandable JSON."""
+ iso = self._iso_datetime(value)
+ if iso:
+ return iso
+ elif isinstance(value, six.binary_type):
+ # TODO: Be stricter.
+ return six.text_type(value, errors='replace')
+ elif isinstance(value, set):
+ return list(value)
+ return value
+
+ def _to_python(self, value):
+ """Convert values from ElasticSearch to native Python values."""
+ if isinstance(value, (int, float, complex, list, tuple, bool)):
+ return value
+
+ if isinstance(value, six.string_types):
+ possible_datetime = DATETIME_REGEX.search(value)
+
+ if possible_datetime:
+ date_values = possible_datetime.groupdict()
+
+ for dk, dv in date_values.items():
+ date_values[dk] = int(dv)
+
+ return datetime.datetime(
+ date_values['year'], date_values['month'],
+ date_values['day'], date_values['hour'],
+ date_values['minute'], date_values['second'])
+
+ try:
+ # This is slightly gross but it's hard to tell otherwise what the
+ # string's original type might have been. Be careful who you trust.
+ converted_value = eval(value)
+
+ # Try to handle most built-in types.
+ if isinstance(
+ converted_value,
+ (int, list, tuple, set, dict, float, complex)):
+ return converted_value
+ except Exception:
+ # If it fails (SyntaxError or its ilk) or we don't trust it,
+ # continue on.
+ pass
+
+ return value
+
+# DRL_FIXME: Perhaps move to something where, if none of these
+# match, call a custom method on the form that returns, per-backend,
+# the right type of storage?
+DEFAULT_FIELD_MAPPING = {'type': 'string', 'analyzer': 'snowball'}
+FIELD_MAPPINGS = {
+ 'edge_ngram': {'type': 'string', 'analyzer': 'edgengram_analyzer'},
+ 'ngram': {'type': 'string', 'analyzer': 'ngram_analyzer'},
+ 'date': {'type': 'date'},
+ 'datetime': {'type': 'date'},
+
+ 'location': {'type': 'geo_point'},
+ 'boolean': {'type': 'boolean'},
+ 'float': {'type': 'float'},
+ 'long': {'type': 'long'},
+ 'integer': {'type': 'long'},
+}
+
+
+# Sucks that this is almost an exact copy of what's in the Solr backend,
+# but we can't import due to dependencies.
+class ElasticsearchSearchQuery(BaseSearchQuery):
+ def matching_all_fragment(self):
+ return '*:*'
+
+ def build_query_fragment(self, field, filter_type, value):
+ from haystack import connections
+ query_frag = ''
+
+ if not hasattr(value, 'input_type_name'):
+ # Handle when we've got a ``ValuesListQuerySet``...
+ if hasattr(value, 'values_list'):
+ value = list(value)
+
+ if isinstance(value, six.string_types):
+ # It's not an ``InputType``. Assume ``Clean``.
+ value = Clean(value)
+ else:
+ value = PythonData(value)
+
+ # Prepare the query using the InputType.
+ prepared_value = value.prepare(self)
+
+ if not isinstance(prepared_value, (set, list, tuple)):
+ # Then convert whatever we get back to what pysolr wants if needed.
+ prepared_value = self.backend._from_python(prepared_value)
+
+ # 'content' is a special reserved word, much like 'pk' in
+ # Django's ORM layer. It indicates 'no special field'.
+ if field == 'content':
+ index_fieldname = ''
+ else:
+ index_fieldname = u'%s:' % connections[self._using].get_unified_index().get_index_fieldname(field)
+
+ filter_types = {
+ 'contains': u'%s',
+ 'startswith': u'%s*',
+ 'exact': u'%s',
+ 'gt': u'{%s TO *}',
+ 'gte': u'[%s TO *]',
+ 'lt': u'{* TO %s}',
+ 'lte': u'[* TO %s]',
+ }
+
+ if value.post_process is False:
+ query_frag = prepared_value
+ else:
+ if filter_type in ['contains', 'startswith']:
+ if value.input_type_name == 'exact':
+ query_frag = prepared_value
+ else:
+ # Iterate over terms & incorportate the converted form of each into the query.
+ terms = []
+
+ if isinstance(prepared_value, six.string_types):
+ for possible_value in prepared_value.split(' '):
+ terms.append(filter_types[filter_type] % self.backend._from_python(possible_value))
+ else:
+ terms.append(filter_types[filter_type] % self.backend._from_python(prepared_value))
+
+ if len(terms) == 1:
+ query_frag = terms[0]
+ else:
+ query_frag = u"(%s)" % " AND ".join(terms)
+ elif filter_type == 'in':
+ in_options = []
+
+ for possible_value in prepared_value:
+ in_options.append(u'"%s"' % self.backend._from_python(possible_value))
+
+ query_frag = u"(%s)" % " OR ".join(in_options)
+ elif filter_type == 'range':
+ start = self.backend._from_python(prepared_value[0])
+ end = self.backend._from_python(prepared_value[1])
+ query_frag = u'["%s" TO "%s"]' % (start, end)
+ elif filter_type == 'exact':
+ if value.input_type_name == 'exact':
+ query_frag = prepared_value
+ else:
+ prepared_value = Exact(prepared_value).prepare(self)
+ query_frag = filter_types[filter_type] % prepared_value
+ else:
+ if value.input_type_name != 'exact':
+ prepared_value = Exact(prepared_value).prepare(self)
+
+ query_frag = filter_types[filter_type] % prepared_value
+
+ if len(query_frag) and not isinstance(value, Raw):
+ if not query_frag.startswith('(') and not query_frag.endswith(')'):
+ query_frag = "(%s)" % query_frag
+
+ return u"%s%s" % (index_fieldname, query_frag)
+
+ def build_alt_parser_query(self, parser_name, query_string='', **kwargs):
+ if query_string:
+ kwargs['v'] = query_string
+
+ kwarg_bits = []
+
+ for key in sorted(kwargs.keys()):
+ if isinstance(kwargs[key], six.string_types) and ' ' in kwargs[key]:
+ kwarg_bits.append(u"%s='%s'" % (key, kwargs[key]))
+ else:
+ kwarg_bits.append(u"%s=%s" % (key, kwargs[key]))
+
+ return u"{!%s %s}" % (parser_name, ' '.join(kwarg_bits))
+
+ def build_params(self, spelling_query=None, **kwargs):
+ search_kwargs = {
+ 'start_offset': self.start_offset,
+ 'result_class': self.result_class
+ }
+ order_by_list = None
+
+ if self.order_by:
+ if order_by_list is None:
+ order_by_list = []
+
+ for field in self.order_by:
+ direction = 'asc'
+ if field.startswith('-'):
+ direction = 'desc'
+ field = field[1:]
+ order_by_list.append((field, direction))
+
+ search_kwargs['sort_by'] = order_by_list
+
+ if self.date_facets:
+ search_kwargs['date_facets'] = self.date_facets
+
+ if self.distance_point:
+ search_kwargs['distance_point'] = self.distance_point
+
+ if self.dwithin:
+ search_kwargs['dwithin'] = self.dwithin
+
+ if self.end_offset is not None:
+ search_kwargs['end_offset'] = self.end_offset
+
+ if self.facets:
+ search_kwargs['facets'] = self.facets
+
+ if self.fields:
+ search_kwargs['fields'] = self.fields
+
+ if self.highlight:
+ search_kwargs['highlight'] = self.highlight
+
+ if self.models:
+ search_kwargs['models'] = self.models
+
+ if self.narrow_queries:
+ search_kwargs['narrow_queries'] = self.narrow_queries
+
+ if self.query_facets:
+ search_kwargs['query_facets'] = self.query_facets
+
+ if self.within:
+ search_kwargs['within'] = self.within
+
+ if spelling_query:
+ search_kwargs['spelling_query'] = spelling_query
+
+ return search_kwargs
+
+ def run(self, spelling_query=None, **kwargs):
+ """Builds and executes the query. Returns a list of search results."""
+ final_query = self.build_query()
+ search_kwargs = self.build_params(spelling_query, **kwargs)
+
+ if kwargs:
+ search_kwargs.update(kwargs)
+
+ results = self.backend.search(final_query, **search_kwargs)
+ self._results = results.get('results', [])
+ self._hit_count = results.get('hits', 0)
+ self._facet_counts = self.post_process_facets(results)
+ self._spelling_suggestion = results.get('spelling_suggestion', None)
+
+ def run_mlt(self, **kwargs):
+ """Builds and executes the query. Returns a list of search results."""
+ if self._more_like_this is False or self._mlt_instance is None:
+ raise MoreLikeThisError("No instance was provided to determine 'More Like This' results.")
+
+ additional_query_string = self.build_query()
+ search_kwargs = {
+ 'start_offset': self.start_offset,
+ 'result_class': self.result_class,
+ 'models': self.models
+ }
+
+ if self.end_offset is not None:
+ search_kwargs['end_offset'] = self.end_offset - self.start_offset
+
+ results = self.backend.more_like_this(self._mlt_instance, additional_query_string, **search_kwargs)
+ self._results = results.get('results', [])
+ self._hit_count = results.get('hits', 0)
+
+
+class ElasticsearchSearchEngine(BaseEngine):
+ backend = ElasticsearchSearchBackend
+ query = ElasticsearchSearchQuery
diff --git a/haystack/backends/simple_backend.py b/haystack/backends/simple_backend.py
new file mode 100644
index 0000000..ff7402e
--- /dev/null
+++ b/haystack/backends/simple_backend.py
@@ -0,0 +1,135 @@
+# encoding: utf-8
+"""
+A very basic, ORM-based backend for simple search during tests.
+"""
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+from warnings import warn
+
+from django.conf import settings
+from django.db.models import Q
+from django.utils import six
+
+from haystack import connections
+from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, log_query, SearchNode
+from haystack.inputs import PythonData
+from haystack.models import SearchResult
+from haystack.utils import get_model_ct_tuple
+
+if settings.DEBUG:
+ import logging
+
+ class NullHandler(logging.Handler):
+ def emit(self, record):
+ pass
+
+ ch = logging.StreamHandler()
+ ch.setLevel(logging.WARNING)
+ ch.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
+
+ logger = logging.getLogger('haystack.simple_backend')
+ logger.setLevel(logging.WARNING)
+ logger.addHandler(NullHandler())
+ logger.addHandler(ch)
+else:
+ logger = None
+
+
+class SimpleSearchBackend(BaseSearchBackend):
+ def update(self, indexer, iterable, commit=True):
+ warn('update is not implemented in this backend')
+
+ def remove(self, obj, commit=True):
+ warn('remove is not implemented in this backend')
+
+ def clear(self, models=[], commit=True):
+ warn('clear is not implemented in this backend')
+
+ @log_query
+ def search(self, query_string, **kwargs):
+ hits = 0
+ results = []
+ result_class = SearchResult
+ models = connections[self.connection_alias].get_unified_index().get_indexed_models()
+
+ if kwargs.get('result_class'):
+ result_class = kwargs['result_class']
+
+ if kwargs.get('models'):
+ models = kwargs['models']
+
+ if query_string:
+ for model in models:
+ if query_string == '*':
+ qs = model.objects.all()
+ else:
+ for term in query_string.split():
+ queries = []
+
+ for field in model._meta.fields:
+ if hasattr(field, 'related'):
+ continue
+
+ if not field.get_internal_type() in ('TextField', 'CharField', 'SlugField'):
+ continue
+
+ queries.append(Q(**{'%s__icontains' % field.name: term}))
+
+ qs = model.objects.filter(six.moves.reduce(lambda x, y: x | y, queries))
+
+ hits += len(qs)
+
+ for match in qs:
+ match.__dict__.pop('score', None)
+ app_label, model_name = get_model_ct_tuple(match)
+ result = result_class(app_label, model_name, match.pk, 0, **match.__dict__)
+ # For efficiency.
+ result._model = match.__class__
+ result._object = match
+ results.append(result)
+
+ return {
+ 'results': results,
+ 'hits': hits,
+ }
+
+ def prep_value(self, db_field, value):
+ return value
+
+ def more_like_this(self, model_instance, additional_query_string=None,
+ start_offset=0, end_offset=None,
+ limit_to_registered_models=None, result_class=None, **kwargs):
+ return {
+ 'results': [],
+ 'hits': 0
+ }
+
+
+class SimpleSearchQuery(BaseSearchQuery):
+ def build_query(self):
+ if not self.query_filter:
+ return '*'
+
+ return self._build_sub_query(self.query_filter)
+
+ def _build_sub_query(self, search_node):
+ term_list = []
+
+ for child in search_node.children:
+ if isinstance(child, SearchNode):
+ term_list.append(self._build_sub_query(child))
+ else:
+ value = child[1]
+
+ if not hasattr(value, 'input_type_name'):
+ value = PythonData(value)
+
+ term_list.append(value.prepare(self))
+
+ return (' ').join(map(six.text_type, term_list))
+
+
+class SimpleEngine(BaseEngine):
+ backend = SimpleSearchBackend
+ query = SimpleSearchQuery
diff --git a/haystack/backends/solr_backend.py b/haystack/backends/solr_backend.py
new file mode 100644
index 0000000..4f301c9
--- /dev/null
+++ b/haystack/backends/solr_backend.py
@@ -0,0 +1,718 @@
+# encoding: utf-8
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import warnings
+
+from django.conf import settings
+from django.core.exceptions import ImproperlyConfigured
+from django.utils import six
+
+from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, EmptyResults, log_query
+from haystack.constants import DJANGO_CT, DJANGO_ID, ID
+from haystack.exceptions import MissingDependency, MoreLikeThisError, SkipDocument
+from haystack.inputs import Clean, Exact, PythonData, Raw
+from haystack.models import SearchResult
+from haystack.utils import log as logging
+from haystack.utils import get_identifier, get_model_ct
+from haystack.utils.app_loading import haystack_get_model
+
+try:
+ from pysolr import Solr, SolrError
+except ImportError:
+ raise MissingDependency("The 'solr' backend requires the installation of 'pysolr'. Please refer to the documentation.")
+
+
+class SolrSearchBackend(BaseSearchBackend):
+ # Word reserved by Solr for special use.
+ RESERVED_WORDS = (
+ 'AND',
+ 'NOT',
+ 'OR',
+ 'TO',
+ )
+
+ # Characters reserved by Solr for special use.
+ # The '\\' must come first, so as not to overwrite the other slash replacements.
+ RESERVED_CHARACTERS = (
+ '\\', '+', '-', '&&', '||', '!', '(', ')', '{', '}',
+ '[', ']', '^', '"', '~', '*', '?', ':', '/',
+ )
+
+ def __init__(self, connection_alias, **connection_options):
+ super(SolrSearchBackend, self).__init__(connection_alias, **connection_options)
+
+ if not 'URL' in connection_options:
+ raise ImproperlyConfigured("You must specify a 'URL' in your settings for connection '%s'." % connection_alias)
+
+ self.conn = Solr(connection_options['URL'], timeout=self.timeout, **connection_options.get('KWARGS', {}))
+ self.log = logging.getLogger('haystack')
+
+ def update(self, index, iterable, commit=True):
+ docs = []
+
+ for obj in iterable:
+ try:
+ docs.append(index.full_prepare(obj))
+ except SkipDocument:
+ self.log.debug(u"Indexing for object `%s` skipped", obj)
+ except UnicodeDecodeError:
+ if not self.silently_fail:
+ raise
+
+ # We'll log the object identifier but won't include the actual object
+ # to avoid the possibility of that generating encoding errors while
+ # processing the log message:
+ self.log.error(u"UnicodeDecodeError while preparing object for update", exc_info=True, extra={
+ "data": {
+ "index": index,
+ "object": get_identifier(obj)
+ }
+ })
+
+ if len(docs) > 0:
+ try:
+ self.conn.add(docs, commit=commit, boost=index.get_field_weights())
+ except (IOError, SolrError) as e:
+ if not self.silently_fail:
+ raise
+
+ self.log.error("Failed to add documents to Solr: %s", e)
+
+ def remove(self, obj_or_string, commit=True):
+ solr_id = get_identifier(obj_or_string)
+
+ try:
+ kwargs = {
+ 'commit': commit,
+ 'id': solr_id
+ }
+ self.conn.delete(**kwargs)
+ except (IOError, SolrError) as e:
+ if not self.silently_fail:
+ raise
+
+ self.log.error("Failed to remove document '%s' from Solr: %s", solr_id, e)
+
+ def clear(self, models=[], commit=True):
+ try:
+ if not models:
+ # *:* matches all docs in Solr
+ self.conn.delete(q='*:*', commit=commit)
+ else:
+ models_to_delete = []
+
+ for model in models:
+ models_to_delete.append("%s:%s" % (DJANGO_CT, get_model_ct(model)))
+
+ self.conn.delete(q=" OR ".join(models_to_delete), commit=commit)
+
+ if commit:
+ # Run an optimize post-clear. http://wiki.apache.org/solr/FAQ#head-9aafb5d8dff5308e8ea4fcf4b71f19f029c4bb99
+ self.conn.optimize()
+ except (IOError, SolrError) as e:
+ if not self.silently_fail:
+ raise
+
+ if len(models):
+ self.log.error("Failed to clear Solr index of models '%s': %s", ','.join(models_to_delete), e)
+ else:
+ self.log.error("Failed to clear Solr index: %s", e)
+
+ @log_query
+ def search(self, query_string, **kwargs):
+ if len(query_string) == 0:
+ return {
+ 'results': [],
+ 'hits': 0,
+ }
+
+ search_kwargs = self.build_search_kwargs(query_string, **kwargs)
+
+ try:
+ raw_results = self.conn.search(query_string, **search_kwargs)
+ except (IOError, SolrError) as e:
+ if not self.silently_fail:
+ raise
+
+ self.log.error("Failed to query Solr using '%s': %s", query_string, e)
+ raw_results = EmptyResults()
+
+ return self._process_results(raw_results, highlight=kwargs.get('highlight'), result_class=kwargs.get('result_class', SearchResult), distance_point=kwargs.get('distance_point'))
+
+ def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None,
+ fields='', highlight=False, facets=None,
+ date_facets=None, query_facets=None,
+ narrow_queries=None, spelling_query=None,
+ within=None, dwithin=None, distance_point=None,
+ models=None, limit_to_registered_models=None,
+ result_class=None, stats=None):
+ kwargs = {'fl': '* score'}
+
+ if fields:
+ if isinstance(fields, (list, set)):
+ fields = " ".join(fields)
+
+ kwargs['fl'] = fields
+
+ if sort_by is not None:
+ if sort_by in ['distance asc', 'distance desc'] and distance_point:
+ # Do the geo-enabled sort.
+ lng, lat = distance_point['point'].get_coords()
+ kwargs['sfield'] = distance_point['field']
+ kwargs['pt'] = '%s,%s' % (lat, lng)
+
+ if sort_by == 'distance asc':
+ kwargs['sort'] = 'geodist() asc'
+ else:
+ kwargs['sort'] = 'geodist() desc'
+ else:
+ if sort_by.startswith('distance '):
+ warnings.warn("In order to sort by distance, you must call the '.distance(...)' method.")
+
+ # Regular sorting.
+ kwargs['sort'] = sort_by
+
+ if start_offset is not None:
+ kwargs['start'] = start_offset
+
+ if end_offset is not None:
+ kwargs['rows'] = end_offset - start_offset
+
+ if highlight is True:
+ kwargs['hl'] = 'true'
+ kwargs['hl.fragsize'] = '200'
+
+ if self.include_spelling is True:
+ kwargs['spellcheck'] = 'true'
+ kwargs['spellcheck.collate'] = 'true'
+ kwargs['spellcheck.count'] = 1
+
+ if spelling_query:
+ kwargs['spellcheck.q'] = spelling_query
+
+ if facets is not None:
+ kwargs['facet'] = 'on'
+ kwargs['facet.field'] = facets.keys()
+
+ for facet_field, options in facets.items():
+ for key, value in options.items():
+ kwargs['f.%s.facet.%s' % (facet_field, key)] = self.conn._from_python(value)
+
+ if date_facets is not None:
+ kwargs['facet'] = 'on'
+ kwargs['facet.date'] = date_facets.keys()
+ kwargs['facet.date.other'] = 'none'
+
+ for key, value in date_facets.items():
+ kwargs["f.%s.facet.date.start" % key] = self.conn._from_python(value.get('start_date'))
+ kwargs["f.%s.facet.date.end" % key] = self.conn._from_python(value.get('end_date'))
+ gap_by_string = value.get('gap_by').upper()
+ gap_string = "%d%s" % (value.get('gap_amount'), gap_by_string)
+
+ if value.get('gap_amount') != 1:
+ gap_string += "S"
+
+ kwargs["f.%s.facet.date.gap" % key] = '+%s/%s' % (gap_string, gap_by_string)
+
+ if query_facets is not None:
+ kwargs['facet'] = 'on'
+ kwargs['facet.query'] = ["%s:%s" % (field, value) for field, value in query_facets]
+
+ if limit_to_registered_models is None:
+ limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True)
+
+ if models and len(models):
+ model_choices = sorted(get_model_ct(model) for model in models)
+ elif limit_to_registered_models:
+ # Using narrow queries, limit the results to only models handled
+ # with the current routers.
+ model_choices = self.build_models_list()
+ else:
+ model_choices = []
+
+ if len(model_choices) > 0:
+ if narrow_queries is None:
+ narrow_queries = set()
+
+ narrow_queries.add('%s:(%s)' % (DJANGO_CT, ' OR '.join(model_choices)))
+
+ if narrow_queries is not None:
+ kwargs['fq'] = list(narrow_queries)
+
+ if stats:
+ kwargs['stats'] = "true"
+
+ for k in stats.keys():
+ kwargs['stats.field'] = k
+
+ for facet in stats[k]:
+ kwargs['f.%s.stats.facet' % k] = facet
+
+ if within is not None:
+ from haystack.utils.geo import generate_bounding_box
+
+ kwargs.setdefault('fq', [])
+ ((min_lat, min_lng), (max_lat, max_lng)) = generate_bounding_box(within['point_1'], within['point_2'])
+ # Bounding boxes are min, min TO max, max. Solr's wiki was *NOT*
+ # very clear on this.
+ bbox = '%s:[%s,%s TO %s,%s]' % (within['field'], min_lat, min_lng, max_lat, max_lng)
+ kwargs['fq'].append(bbox)
+
+ if dwithin is not None:
+ kwargs.setdefault('fq', [])
+ lng, lat = dwithin['point'].get_coords()
+ geofilt = '{!geofilt pt=%s,%s sfield=%s d=%s}' % (lat, lng, dwithin['field'], dwithin['distance'].km)
+ kwargs['fq'].append(geofilt)
+
+ # Check to see if the backend should try to include distances
+ # (Solr 4.X+) in the results.
+ if self.distance_available and distance_point:
+ # In early testing, you can't just hand Solr 4.X a proper bounding box
+ # & request distances. To enable native distance would take calculating
+ # a center point & a radius off the user-provided box, which kinda
+ # sucks. We'll avoid it for now, since Solr 4.x's release will be some
+ # time yet.
+ # kwargs['fl'] += ' _dist_:geodist()'
+ pass
+
+ return kwargs
+
+ def more_like_this(self, model_instance, additional_query_string=None,
+ start_offset=0, end_offset=None, models=None,
+ limit_to_registered_models=None, result_class=None, **kwargs):
+ from haystack import connections
+
+ # Deferred models will have a different class ("RealClass_Deferred_fieldname")
+ # which won't be in our registry:
+ model_klass = model_instance._meta.concrete_model
+
+ index = connections[self.connection_alias].get_unified_index().get_index(model_klass)
+ field_name = index.get_content_field()
+ params = {
+ 'fl': '*,score',
+ }
+
+ if start_offset is not None:
+ params['start'] = start_offset
+
+ if end_offset is not None:
+ params['rows'] = end_offset
+
+ narrow_queries = set()
+
+ if limit_to_registered_models is None:
+ limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True)
+
+ if models and len(models):
+ model_choices = sorted(get_model_ct(model) for model in models)
+ elif limit_to_registered_models:
+ # Using narrow queries, limit the results to only models handled
+ # with the current routers.
+ model_choices = self.build_models_list()
+ else:
+ model_choices = []
+
+ if len(model_choices) > 0:
+ if narrow_queries is None:
+ narrow_queries = set()
+
+ narrow_queries.add('%s:(%s)' % (DJANGO_CT, ' OR '.join(model_choices)))
+
+ if additional_query_string:
+ narrow_queries.add(additional_query_string)
+
+ if narrow_queries:
+ params['fq'] = list(narrow_queries)
+
+ query = "%s:%s" % (ID, get_identifier(model_instance))
+
+ try:
+ raw_results = self.conn.more_like_this(query, field_name, **params)
+ except (IOError, SolrError) as e:
+ if not self.silently_fail:
+ raise
+
+ self.log.error("Failed to fetch More Like This from Solr for document '%s': %s", query, e)
+ raw_results = EmptyResults()
+
+ return self._process_results(raw_results, result_class=result_class)
+
+ def _process_results(self, raw_results, highlight=False, result_class=None, distance_point=None):
+ from haystack import connections
+ results = []
+ hits = raw_results.hits
+ facets = {}
+ stats = {}
+ spelling_suggestion = None
+
+ if result_class is None:
+ result_class = SearchResult
+
+ if hasattr(raw_results,'stats'):
+ stats = raw_results.stats.get('stats_fields',{})
+
+ if hasattr(raw_results, 'facets'):
+ facets = {
+ 'fields': raw_results.facets.get('facet_fields', {}),
+ 'dates': raw_results.facets.get('facet_dates', {}),
+ 'queries': raw_results.facets.get('facet_queries', {}),
+ }
+
+ for key in ['fields']:
+ for facet_field in facets[key]:
+ # Convert to a two-tuple, as Solr's json format returns a list of
+ # pairs.
+ facets[key][facet_field] = list(zip(facets[key][facet_field][::2], facets[key][facet_field][1::2]))
+
+ if self.include_spelling is True:
+ if hasattr(raw_results, 'spellcheck'):
+ if len(raw_results.spellcheck.get('suggestions', [])):
+ # For some reason, it's an array of pairs. Pull off the
+ # collated result from the end.
+ spelling_suggestion = raw_results.spellcheck.get('suggestions')[-1]
+
+ unified_index = connections[self.connection_alias].get_unified_index()
+ indexed_models = unified_index.get_indexed_models()
+
+ for raw_result in raw_results.docs:
+ app_label, model_name = raw_result[DJANGO_CT].split('.')
+ additional_fields = {}
+ model = haystack_get_model(app_label, model_name)
+
+ if model and model in indexed_models:
+ index = unified_index.get_index(model)
+ index_field_map = index.field_map
+ for key, value in raw_result.items():
+ string_key = str(key)
+ # re-map key if alternate name used
+ if string_key in index_field_map:
+ string_key = index_field_map[key]
+
+ if string_key in index.fields and hasattr(index.fields[string_key], 'convert'):
+ additional_fields[string_key] = index.fields[string_key].convert(value)
+ else:
+ additional_fields[string_key] = self.conn._to_python(value)
+
+ del(additional_fields[DJANGO_CT])
+ del(additional_fields[DJANGO_ID])
+ del(additional_fields['score'])
+
+ if raw_result[ID] in getattr(raw_results, 'highlighting', {}):
+ additional_fields['highlighted'] = raw_results.highlighting[raw_result[ID]]
+
+ if distance_point:
+ additional_fields['_point_of_origin'] = distance_point
+
+ if raw_result.get('__dist__'):
+ from haystack.utils.geo import Distance
+ additional_fields['_distance'] = Distance(km=float(raw_result['__dist__']))
+ else:
+ additional_fields['_distance'] = None
+
+ result = result_class(app_label, model_name, raw_result[DJANGO_ID], raw_result['score'], **additional_fields)
+ results.append(result)
+ else:
+ hits -= 1
+
+ return {
+ 'results': results,
+ 'hits': hits,
+ 'stats': stats,
+ 'facets': facets,
+ 'spelling_suggestion': spelling_suggestion,
+ }
+
+ def build_schema(self, fields):
+ content_field_name = ''
+ schema_fields = []
+
+ for field_name, field_class in fields.items():
+ field_data = {
+ 'field_name': field_class.index_fieldname,
+ 'type': 'text_en',
+ 'indexed': 'true',
+ 'stored': 'true',
+ 'multi_valued': 'false',
+ }
+
+ if field_class.document is True:
+ content_field_name = field_class.index_fieldname
+
+ # DRL_FIXME: Perhaps move to something where, if none of these
+ # checks succeed, call a custom method on the form that
+ # returns, per-backend, the right type of storage?
+ if field_class.field_type in ['date', 'datetime']:
+ field_data['type'] = 'date'
+ elif field_class.field_type == 'integer':
+ field_data['type'] = 'long'
+ elif field_class.field_type == 'float':
+ field_data['type'] = 'float'
+ elif field_class.field_type == 'boolean':
+ field_data['type'] = 'boolean'
+ elif field_class.field_type == 'ngram':
+ field_data['type'] = 'ngram'
+ elif field_class.field_type == 'edge_ngram':
+ field_data['type'] = 'edge_ngram'
+ elif field_class.field_type == 'location':
+ field_data['type'] = 'location'
+
+ if field_class.is_multivalued:
+ field_data['multi_valued'] = 'true'
+
+ if field_class.stored is False:
+ field_data['stored'] = 'false'
+
+ # Do this last to override `text` fields.
+ if field_class.indexed is False:
+ field_data['indexed'] = 'false'
+
+ # If it's text and not being indexed, we probably don't want
+ # to do the normal lowercase/tokenize/stemming/etc. dance.
+ if field_data['type'] == 'text_en':
+ field_data['type'] = 'string'
+
+ # If it's a ``FacetField``, make sure we don't postprocess it.
+ if hasattr(field_class, 'facet_for'):
+ # If it's text, it ought to be a string.
+ if field_data['type'] == 'text_en':
+ field_data['type'] = 'string'
+
+ schema_fields.append(field_data)
+
+ return (content_field_name, schema_fields)
+
+ def extract_file_contents(self, file_obj):
+ """Extract text and metadata from a structured file (PDF, MS Word, etc.)
+
+ Uses the Solr ExtractingRequestHandler, which is based on Apache Tika.
+ See the Solr wiki for details:
+
+ http://wiki.apache.org/solr/ExtractingRequestHandler
+
+ Due to the way the ExtractingRequestHandler is implemented it completely
+ replaces the normal Haystack indexing process with several unfortunate
+ restrictions: only one file per request, the extracted data is added to
+ the index with no ability to modify it, etc. To simplify the process and
+ allow for more advanced use we'll run using the extract-only mode to
+ return the extracted data without adding it to the index so we can then
+ use it within Haystack's normal templating process.
+
+ Returns None if metadata cannot be extracted; otherwise returns a
+ dictionary containing at least two keys:
+
+ :contents:
+ Extracted full-text content, if applicable
+ :metadata:
+ key:value pairs of text strings
+ """
+
+ try:
+ return self.conn.extract(file_obj)
+ except Exception as e:
+ self.log.warning(u"Unable to extract file contents: %s", e,
+ exc_info=True, extra={"data": {"file": file_obj}})
+ return None
+
+
+class SolrSearchQuery(BaseSearchQuery):
+ def matching_all_fragment(self):
+ return '*:*'
+
+ def build_query_fragment(self, field, filter_type, value):
+ from haystack import connections
+ query_frag = ''
+
+ if not hasattr(value, 'input_type_name'):
+ # Handle when we've got a ``ValuesListQuerySet``...
+ if hasattr(value, 'values_list'):
+ value = list(value)
+
+ if isinstance(value, six.string_types):
+ # It's not an ``InputType``. Assume ``Clean``.
+ value = Clean(value)
+ else:
+ value = PythonData(value)
+
+ # Prepare the query using the InputType.
+ prepared_value = value.prepare(self)
+
+ if not isinstance(prepared_value, (set, list, tuple)):
+ # Then convert whatever we get back to what pysolr wants if needed.
+ prepared_value = self.backend.conn._from_python(prepared_value)
+
+ # 'content' is a special reserved word, much like 'pk' in
+ # Django's ORM layer. It indicates 'no special field'.
+ if field == 'content':
+ index_fieldname = ''
+ else:
+ index_fieldname = u'%s:' % connections[self._using].get_unified_index().get_index_fieldname(field)
+
+ filter_types = {
+ 'contains': u'%s',
+ 'startswith': u'%s*',
+ 'exact': u'%s',
+ 'gt': u'{%s TO *}',
+ 'gte': u'[%s TO *]',
+ 'lt': u'{* TO %s}',
+ 'lte': u'[* TO %s]',
+ }
+
+ if value.post_process is False:
+ query_frag = prepared_value
+ else:
+ if filter_type in ['contains', 'startswith']:
+ if value.input_type_name == 'exact':
+ query_frag = prepared_value
+ else:
+ # Iterate over terms & incorportate the converted form of each into the query.
+ terms = []
+
+ for possible_value in prepared_value.split(' '):
+ terms.append(filter_types[filter_type] % self.backend.conn._from_python(possible_value))
+
+ if len(terms) == 1:
+ query_frag = terms[0]
+ else:
+ query_frag = u"(%s)" % " AND ".join(terms)
+ elif filter_type == 'in':
+ in_options = []
+
+ for possible_value in prepared_value:
+ in_options.append(u'"%s"' % self.backend.conn._from_python(possible_value))
+
+ query_frag = u"(%s)" % " OR ".join(in_options)
+ elif filter_type == 'range':
+ start = self.backend.conn._from_python(prepared_value[0])
+ end = self.backend.conn._from_python(prepared_value[1])
+ query_frag = u'["%s" TO "%s"]' % (start, end)
+ elif filter_type == 'exact':
+ if value.input_type_name == 'exact':
+ query_frag = prepared_value
+ else:
+ prepared_value = Exact(prepared_value).prepare(self)
+ query_frag = filter_types[filter_type] % prepared_value
+ else:
+ if value.input_type_name != 'exact':
+ prepared_value = Exact(prepared_value).prepare(self)
+
+ query_frag = filter_types[filter_type] % prepared_value
+
+ if len(query_frag) and not isinstance(value, Raw):
+ if not query_frag.startswith('(') and not query_frag.endswith(')'):
+ query_frag = "(%s)" % query_frag
+
+ return u"%s%s" % (index_fieldname, query_frag)
+
+ def build_alt_parser_query(self, parser_name, query_string='', **kwargs):
+ if query_string:
+ query_string = Clean(query_string).prepare(self)
+
+ kwarg_bits = []
+
+ for key in sorted(kwargs.keys()):
+ if isinstance(kwargs[key], six.string_types) and ' ' in kwargs[key]:
+ kwarg_bits.append(u"%s='%s'" % (key, kwargs[key]))
+ else:
+ kwarg_bits.append(u"%s=%s" % (key, kwargs[key]))
+
+ return u'_query_:"{!%s %s}%s"' % (parser_name, Clean(' '.join(kwarg_bits)), query_string)
+
+ def build_params(self, spelling_query=None, **kwargs):
+ search_kwargs = {
+ 'start_offset': self.start_offset,
+ 'result_class': self.result_class
+ }
+ order_by_list = None
+
+ if self.order_by:
+ if order_by_list is None:
+ order_by_list = []
+
+ for order_by in self.order_by:
+ if order_by.startswith('-'):
+ order_by_list.append('%s desc' % order_by[1:])
+ else:
+ order_by_list.append('%s asc' % order_by)
+
+ search_kwargs['sort_by'] = ", ".join(order_by_list)
+
+ if self.date_facets:
+ search_kwargs['date_facets'] = self.date_facets
+
+ if self.distance_point:
+ search_kwargs['distance_point'] = self.distance_point
+
+ if self.dwithin:
+ search_kwargs['dwithin'] = self.dwithin
+
+ if self.end_offset is not None:
+ search_kwargs['end_offset'] = self.end_offset
+
+ if self.facets:
+ search_kwargs['facets'] = self.facets
+
+ if self.fields:
+ search_kwargs['fields'] = self.fields
+
+ if self.highlight:
+ search_kwargs['highlight'] = self.highlight
+
+ if self.models:
+ search_kwargs['models'] = self.models
+
+ if self.narrow_queries:
+ search_kwargs['narrow_queries'] = self.narrow_queries
+
+ if self.query_facets:
+ search_kwargs['query_facets'] = self.query_facets
+
+ if self.within:
+ search_kwargs['within'] = self.within
+
+ if spelling_query:
+ search_kwargs['spelling_query'] = spelling_query
+
+ if self.stats:
+ search_kwargs['stats'] = self.stats
+
+ return search_kwargs
+
+ def run(self, spelling_query=None, **kwargs):
+ """Builds and executes the query. Returns a list of search results."""
+ final_query = self.build_query()
+ search_kwargs = self.build_params(spelling_query, **kwargs)
+
+ if kwargs:
+ search_kwargs.update(kwargs)
+
+ results = self.backend.search(final_query, **search_kwargs)
+ self._results = results.get('results', [])
+ self._hit_count = results.get('hits', 0)
+ self._facet_counts = self.post_process_facets(results)
+ self._stats = results.get('stats',{})
+ self._spelling_suggestion = results.get('spelling_suggestion', None)
+
+ def run_mlt(self, **kwargs):
+ """Builds and executes the query. Returns a list of search results."""
+ if self._more_like_this is False or self._mlt_instance is None:
+ raise MoreLikeThisError("No instance was provided to determine 'More Like This' results.")
+
+ additional_query_string = self.build_query()
+ search_kwargs = {
+ 'start_offset': self.start_offset,
+ 'result_class': self.result_class,
+ 'models': self.models
+ }
+
+ if self.end_offset is not None:
+ search_kwargs['end_offset'] = self.end_offset - self.start_offset
+
+ results = self.backend.more_like_this(self._mlt_instance, additional_query_string, **search_kwargs)
+ self._results = results.get('results', [])
+ self._hit_count = results.get('hits', 0)
+
+
+class SolrEngine(BaseEngine):
+ backend = SolrSearchBackend
+ query = SolrSearchQuery
diff --git a/haystack/backends/whoosh_backend.py b/haystack/backends/whoosh_backend.py
new file mode 100644
index 0000000..bf26adc
--- /dev/null
+++ b/haystack/backends/whoosh_backend.py
@@ -0,0 +1,916 @@
+# encoding: utf-8
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import os
+import re
+import shutil
+import threading
+import warnings
+
+from django.conf import settings
+from django.core.exceptions import ImproperlyConfigured
+from django.utils import six
+from django.utils.datetime_safe import datetime
+
+from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, EmptyResults, log_query
+from haystack.constants import DJANGO_CT, DJANGO_ID, ID
+from haystack.exceptions import MissingDependency, SearchBackendError, SkipDocument
+from haystack.inputs import Clean, Exact, PythonData, Raw
+from haystack.models import SearchResult
+from haystack.utils import log as logging
+from haystack.utils import get_identifier, get_model_ct
+from haystack.utils.app_loading import haystack_get_model
+
+try:
+ import json
+except ImportError:
+ try:
+ import simplejson as json
+ except ImportError:
+ from django.utils import simplejson as json
+
+try:
+ from django.utils.encoding import force_text
+except ImportError:
+ from django.utils.encoding import force_unicode as force_text
+
+try:
+ import whoosh
+except ImportError:
+ raise MissingDependency("The 'whoosh' backend requires the installation of 'Whoosh'. Please refer to the documentation.")
+
+# Handle minimum requirement.
+if not hasattr(whoosh, '__version__') or whoosh.__version__ < (2, 5, 0):
+ raise MissingDependency("The 'whoosh' backend requires version 2.5.0 or greater.")
+
+# Bubble up the correct error.
+from whoosh import index
+from whoosh.analysis import StemmingAnalyzer
+from whoosh.fields import ID as WHOOSH_ID
+from whoosh.fields import BOOLEAN, DATETIME, IDLIST, KEYWORD, NGRAM, NGRAMWORDS, NUMERIC, Schema, TEXT
+from whoosh.filedb.filestore import FileStorage, RamStorage
+from whoosh.highlight import highlight as whoosh_highlight
+from whoosh.highlight import ContextFragmenter, HtmlFormatter
+from whoosh.qparser import QueryParser
+from whoosh.searching import ResultsPage
+from whoosh.writing import AsyncWriter
+
+
+DATETIME_REGEX = re.compile('^(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})T(?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2})(\.\d{3,6}Z?)?$')
+LOCALS = threading.local()
+LOCALS.RAM_STORE = None
+
+
+class WhooshHtmlFormatter(HtmlFormatter):
+ """
+ This is a HtmlFormatter simpler than the whoosh.HtmlFormatter.
+ We use it to have consistent results across backends. Specifically,
+ Solr, Xapian and Elasticsearch are using this formatting.
+ """
+ template = '<%(tag)s>%(t)s</%(tag)s>'
+
+
+class WhooshSearchBackend(BaseSearchBackend):
+ # Word reserved by Whoosh for special use.
+ RESERVED_WORDS = (
+ 'AND',
+ 'NOT',
+ 'OR',
+ 'TO',
+ )
+
+ # Characters reserved by Whoosh for special use.
+ # The '\\' must come first, so as not to overwrite the other slash replacements.
+ RESERVED_CHARACTERS = (
+ '\\', '+', '-', '&&', '||', '!', '(', ')', '{', '}',
+ '[', ']', '^', '"', '~', '*', '?', ':', '.',
+ )
+
+ def __init__(self, connection_alias, **connection_options):
+ super(WhooshSearchBackend, self).__init__(connection_alias, **connection_options)
+ self.setup_complete = False
+ self.use_file_storage = True
+ self.post_limit = getattr(connection_options, 'POST_LIMIT', 128 * 1024 * 1024)
+ self.path = connection_options.get('PATH')
+
+ if connection_options.get('STORAGE', 'file') != 'file':
+ self.use_file_storage = False
+
+ if self.use_file_storage and not self.path:
+ raise ImproperlyConfigured("You must specify a 'PATH' in your settings for connection '%s'." % connection_alias)
+
+ self.log = logging.getLogger('haystack')
+
+ def setup(self):
+ """
+ Defers loading until needed.
+ """
+ from haystack import connections
+ new_index = False
+
+ # Make sure the index is there.
+ if self.use_file_storage and not os.path.exists(self.path):
+ os.makedirs(self.path)
+ new_index = True
+
+ if self.use_file_storage and not os.access(self.path, os.W_OK):
+ raise IOError("The path to your Whoosh index '%s' is not writable for the current user/group." % self.path)
+
+ if self.use_file_storage:
+ self.storage = FileStorage(self.path)
+ else:
+ global LOCALS
+
+ if LOCALS.RAM_STORE is None:
+ LOCALS.RAM_STORE = RamStorage()
+
+ self.storage = LOCALS.RAM_STORE
+
+ self.content_field_name, self.schema = self.build_schema(connections[self.connection_alias].get_unified_index().all_searchfields())
+ self.parser = QueryParser(self.content_field_name, schema=self.schema)
+
+ if new_index is True:
+ self.index = self.storage.create_index(self.schema)
+ else:
+ try:
+ self.index = self.storage.open_index(schema=self.schema)
+ except index.EmptyIndexError:
+ self.index = self.storage.create_index(self.schema)
+
+ self.setup_complete = True
+
+ def build_schema(self, fields):
+ schema_fields = {
+ ID: WHOOSH_ID(stored=True, unique=True),
+ DJANGO_CT: WHOOSH_ID(stored=True),
+ DJANGO_ID: WHOOSH_ID(stored=True),
+ }
+ # Grab the number of keys that are hard-coded into Haystack.
+ # We'll use this to (possibly) fail slightly more gracefully later.
+ initial_key_count = len(schema_fields)
+ content_field_name = ''
+
+ for field_name, field_class in fields.items():
+ if field_class.is_multivalued:
+ if field_class.indexed is False:
+ schema_fields[field_class.index_fieldname] = IDLIST(stored=True, field_boost=field_class.boost)
+ else:
+ schema_fields[field_class.index_fieldname] = KEYWORD(stored=True, commas=True, scorable=True, field_boost=field_class.boost)
+ elif field_class.field_type in ['date', 'datetime']:
+ schema_fields[field_class.index_fieldname] = DATETIME(stored=field_class.stored, sortable=True)
+ elif field_class.field_type == 'integer':
+ schema_fields[field_class.index_fieldname] = NUMERIC(stored=field_class.stored, numtype=int, field_boost=field_class.boost)
+ elif field_class.field_type == 'float':
+ schema_fields[field_class.index_fieldname] = NUMERIC(stored=field_class.stored, numtype=float, field_boost=field_class.boost)
+ elif field_class.field_type == 'boolean':
+ # Field boost isn't supported on BOOLEAN as of 1.8.2.
+ schema_fields[field_class.index_fieldname] = BOOLEAN(stored=field_class.stored)
+ elif field_class.field_type == 'ngram':
+ schema_fields[field_class.index_fieldname] = NGRAM(minsize=3, maxsize=15, stored=field_class.stored, field_boost=field_class.boost)
+ elif field_class.field_type == 'edge_ngram':
+ schema_fields[field_class.index_fieldname] = NGRAMWORDS(minsize=2, maxsize=15, at='start', stored=field_class.stored, field_boost=field_class.boost)
+ else:
+ schema_fields[field_class.index_fieldname] = TEXT(stored=True, analyzer=StemmingAnalyzer(), field_boost=field_class.boost, sortable=True)
+
+ if field_class.document is True:
+ content_field_name = field_class.index_fieldname
+ schema_fields[field_class.index_fieldname].spelling = True
+
+ # Fail more gracefully than relying on the backend to die if no fields
+ # are found.
+ if len(schema_fields) <= initial_key_count:
+ raise SearchBackendError("No fields were found in any search_indexes. Please correct this before attempting to search.")
+
+ return (content_field_name, Schema(**schema_fields))
+
+ def update(self, index, iterable, commit=True):
+ if not self.setup_complete:
+ self.setup()
+
+ self.index = self.index.refresh()
+ writer = AsyncWriter(self.index)
+
+ for obj in iterable:
+ try:
+ doc = index.full_prepare(obj)
+ except SkipDocument:
+ self.log.debug(u"Indexing for object `%s` skipped", obj)
+ else:
+ # Really make sure it's unicode, because Whoosh won't have it any
+ # other way.
+ for key in doc:
+ doc[key] = self._from_python(doc[key])
+
+ # Document boosts aren't supported in Whoosh 2.5.0+.
+ if 'boost' in doc:
+ del doc['boost']
+
+ try:
+ writer.update_document(**doc)
+ except Exception as e:
+ if not self.silently_fail:
+ raise
+
+ # We'll log the object identifier but won't include the actual object
+ # to avoid the possibility of that generating encoding errors while
+ # processing the log message:
+ self.log.error(u"%s while preparing object for update" % e.__class__.__name__, exc_info=True, extra={
+ "data": {
+ "index": index,
+ "object": get_identifier(obj)
+ }
+ })
+
+ if len(iterable) > 0:
+ # For now, commit no matter what, as we run into locking issues otherwise.
+ writer.commit()
+
+ def remove(self, obj_or_string, commit=True):
+ if not self.setup_complete:
+ self.setup()
+
+ self.index = self.index.refresh()
+ whoosh_id = get_identifier(obj_or_string)
+
+ try:
+ self.index.delete_by_query(q=self.parser.parse(u'%s:"%s"' % (ID, whoosh_id)))
+ except Exception as e:
+ if not self.silently_fail:
+ raise
+
+ self.log.error("Failed to remove document '%s' from Whoosh: %s", whoosh_id, e)
+
+ def clear(self, models=[], commit=True):
+ if not self.setup_complete:
+ self.setup()
+
+ self.index = self.index.refresh()
+
+ try:
+ if not models:
+ self.delete_index()
+ else:
+ models_to_delete = []
+
+ for model in models:
+ models_to_delete.append(u"%s:%s" % (DJANGO_CT, get_model_ct(model)))
+
+ self.index.delete_by_query(q=self.parser.parse(u" OR ".join(models_to_delete)))
+ except Exception as e:
+ if not self.silently_fail:
+ raise
+
+ self.log.error("Failed to clear documents from Whoosh: %s", e)
+
+ def delete_index(self):
+ # Per the Whoosh mailing list, if wiping out everything from the index,
+ # it's much more efficient to simply delete the index files.
+ if self.use_file_storage and os.path.exists(self.path):
+ shutil.rmtree(self.path)
+ elif not self.use_file_storage:
+ self.storage.clean()
+
+ # Recreate everything.
+ self.setup()
+
+ def optimize(self):
+ if not self.setup_complete:
+ self.setup()
+
+ self.index = self.index.refresh()
+ self.index.optimize()
+
+ def calculate_page(self, start_offset=0, end_offset=None):
+ # Prevent against Whoosh throwing an error. Requires an end_offset
+ # greater than 0.
+ if not end_offset is None and end_offset <= 0:
+ end_offset = 1
+
+ # Determine the page.
+ page_num = 0
+
+ if end_offset is None:
+ end_offset = 1000000
+
+ if start_offset is None:
+ start_offset = 0
+
+ page_length = end_offset - start_offset
+
+ if page_length and page_length > 0:
+ page_num = int(start_offset / page_length)
+
+ # Increment because Whoosh uses 1-based page numbers.
+ page_num += 1
+ return page_num, page_length
+
+ @log_query
+ def search(self, query_string, sort_by=None, start_offset=0, end_offset=None,
+ fields='', highlight=False, facets=None, date_facets=None, query_facets=None,
+ narrow_queries=None, spelling_query=None, within=None,
+ dwithin=None, distance_point=None, models=None,
+ limit_to_registered_models=None, result_class=None, **kwargs):
+ if not self.setup_complete:
+ self.setup()
+
+ # A zero length query should return no results.
+ if len(query_string) == 0:
+ return {
+ 'results': [],
+ 'hits': 0,
+ }
+
+ query_string = force_text(query_string)
+
+ # A one-character query (non-wildcard) gets nabbed by a stopwords
+ # filter and should yield zero results.
+ if len(query_string) <= 1 and query_string != u'*':
+ return {
+ 'results': [],
+ 'hits': 0,
+ }
+
+ reverse = False
+
+ if sort_by is not None:
+ # Determine if we need to reverse the results and if Whoosh can
+ # handle what it's being asked to sort by. Reversing is an
+ # all-or-nothing action, unfortunately.
+ sort_by_list = []
+ reverse_counter = 0
+
+ for order_by in sort_by:
+ if order_by.startswith('-'):
+ reverse_counter += 1
+
+ if reverse_counter and reverse_counter != len(sort_by):
+ raise SearchBackendError("Whoosh requires all order_by fields"
+ " to use the same sort direction")
+
+ for order_by in sort_by:
+ if order_by.startswith('-'):
+ sort_by_list.append(order_by[1:])
+
+ if len(sort_by_list) == 1:
+ reverse = True
+ else:
+ sort_by_list.append(order_by)
+
+ if len(sort_by_list) == 1:
+ reverse = False
+
+ sort_by = sort_by_list[0]
+
+ if facets is not None:
+ warnings.warn("Whoosh does not handle faceting.", Warning, stacklevel=2)
+
+ if date_facets is not None:
+ warnings.warn("Whoosh does not handle date faceting.", Warning, stacklevel=2)
+
+ if query_facets is not None:
+ warnings.warn("Whoosh does not handle query faceting.", Warning, stacklevel=2)
+
+ narrowed_results = None
+ self.index = self.index.refresh()
+
+ if limit_to_registered_models is None:
+ limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True)
+
+ if models and len(models):
+ model_choices = sorted(get_model_ct(model) for model in models)
+ elif limit_to_registered_models:
+ # Using narrow queries, limit the results to only models handled
+ # with the current routers.
+ model_choices = self.build_models_list()
+ else:
+ model_choices = []
+
+ if len(model_choices) > 0:
+ if narrow_queries is None:
+ narrow_queries = set()
+
+ narrow_queries.add(' OR '.join(['%s:%s' % (DJANGO_CT, rm) for rm in model_choices]))
+
+ narrow_searcher = None
+
+ if narrow_queries is not None:
+ # Potentially expensive? I don't see another way to do it in Whoosh...
+ narrow_searcher = self.index.searcher()
+
+ for nq in narrow_queries:
+ recent_narrowed_results = narrow_searcher.search(self.parser.parse(force_text(nq)),
+ limit=None)
+
+ if len(recent_narrowed_results) <= 0:
+ return {
+ 'results': [],
+ 'hits': 0,
+ }
+
+ if narrowed_results:
+ narrowed_results.filter(recent_narrowed_results)
+ else:
+ narrowed_results = recent_narrowed_results
+
+ self.index = self.index.refresh()
+
+ if self.index.doc_count():
+ searcher = self.index.searcher()
+ parsed_query = self.parser.parse(query_string)
+
+ # In the event of an invalid/stopworded query, recover gracefully.
+ if parsed_query is None:
+ return {
+ 'results': [],
+ 'hits': 0,
+ }
+
+ page_num, page_length = self.calculate_page(start_offset, end_offset)
+
+ search_kwargs = {
+ 'pagelen': page_length,
+ 'sortedby': sort_by,
+ 'reverse': reverse,
+ }
+
+ # Handle the case where the results have been narrowed.
+ if narrowed_results is not None:
+ search_kwargs['filter'] = narrowed_results
+
+ try:
+ raw_page = searcher.search_page(
+ parsed_query,
+ page_num,
+ **search_kwargs
+ )
+ except ValueError:
+ if not self.silently_fail:
+ raise
+
+ return {
+ 'results': [],
+ 'hits': 0,
+ 'spelling_suggestion': None,
+ }
+
+ # Because as of Whoosh 2.5.1, it will return the wrong page of
+ # results if you request something too high. :(
+ if raw_page.pagenum < page_num:
+ return {
+ 'results': [],
+ 'hits': 0,
+ 'spelling_suggestion': None,
+ }
+
+ results = self._process_results(raw_page, highlight=highlight, query_string=query_string, spelling_query=spelling_query, result_class=result_class)
+ searcher.close()
+
+ if hasattr(narrow_searcher, 'close'):
+ narrow_searcher.close()
+
+ return results
+ else:
+ if self.include_spelling:
+ if spelling_query:
+ spelling_suggestion = self.create_spelling_suggestion(spelling_query)
+ else:
+ spelling_suggestion = self.create_spelling_suggestion(query_string)
+ else:
+ spelling_suggestion = None
+
+ return {
+ 'results': [],
+ 'hits': 0,
+ 'spelling_suggestion': spelling_suggestion,
+ }
+
+ def more_like_this(self, model_instance, additional_query_string=None,
+ start_offset=0, end_offset=None, models=None,
+ limit_to_registered_models=None, result_class=None, **kwargs):
+ if not self.setup_complete:
+ self.setup()
+
+ # Deferred models will have a different class ("RealClass_Deferred_fieldname")
+ # which won't be in our registry:
+ model_klass = model_instance._meta.concrete_model
+
+ field_name = self.content_field_name
+ narrow_queries = set()
+ narrowed_results = None
+ self.index = self.index.refresh()
+
+ if limit_to_registered_models is None:
+ limit_to_registered_models = getattr(settings, 'HAYSTACK_LIMIT_TO_REGISTERED_MODELS', True)
+
+ if models and len(models):
+ model_choices = sorted(get_model_ct(model) for model in models)
+ elif limit_to_registered_models:
+ # Using narrow queries, limit the results to only models handled
+ # with the current routers.
+ model_choices = self.build_models_list()
+ else:
+ model_choices = []
+
+ if len(model_choices) > 0:
+ if narrow_queries is None:
+ narrow_queries = set()
+
+ narrow_queries.add(' OR '.join(['%s:%s' % (DJANGO_CT, rm) for rm in model_choices]))
+
+ if additional_query_string and additional_query_string != '*':
+ narrow_queries.add(additional_query_string)
+
+ narrow_searcher = None
+
+ if narrow_queries is not None:
+ # Potentially expensive? I don't see another way to do it in Whoosh...
+ narrow_searcher = self.index.searcher()
+
+ for nq in narrow_queries:
+ recent_narrowed_results = narrow_searcher.search(self.parser.parse(force_text(nq)),
+ limit=None)
+
+ if len(recent_narrowed_results) <= 0:
+ return {
+ 'results': [],
+ 'hits': 0,
+ }
+
+ if narrowed_results:
+ narrowed_results.filter(recent_narrowed_results)
+ else:
+ narrowed_results = recent_narrowed_results
+
+ page_num, page_length = self.calculate_page(start_offset, end_offset)
+
+ self.index = self.index.refresh()
+ raw_results = EmptyResults()
+
+ if self.index.doc_count():
+ query = "%s:%s" % (ID, get_identifier(model_instance))
+ searcher = self.index.searcher()
+ parsed_query = self.parser.parse(query)
+ results = searcher.search(parsed_query)
+
+ if len(results):
+ raw_results = results[0].more_like_this(field_name, top=end_offset)
+
+ # Handle the case where the results have been narrowed.
+ if narrowed_results is not None and hasattr(raw_results, 'filter'):
+ raw_results.filter(narrowed_results)
+
+ try:
+ raw_page = ResultsPage(raw_results, page_num, page_length)
+ except ValueError:
+ if not self.silently_fail:
+ raise
+
+ return {
+ 'results': [],
+ 'hits': 0,
+ 'spelling_suggestion': None,
+ }
+
+ # Because as of Whoosh 2.5.1, it will return the wrong page of
+ # results if you request something too high. :(
+ if raw_page.pagenum < page_num:
+ return {
+ 'results': [],
+ 'hits': 0,
+ 'spelling_suggestion': None,
+ }
+
+ results = self._process_results(raw_page, result_class=result_class)
+ searcher.close()
+
+ if hasattr(narrow_searcher, 'close'):
+ narrow_searcher.close()
+
+ return results
+
+ def _process_results(self, raw_page, highlight=False, query_string='', spelling_query=None, result_class=None):
+ from haystack import connections
+ results = []
+
+ # It's important to grab the hits first before slicing. Otherwise, this
+ # can cause pagination failures.
+ hits = len(raw_page)
+
+ if result_class is None:
+ result_class = SearchResult
+
+ facets = {}
+ spelling_suggestion = None
+ unified_index = connections[self.connection_alias].get_unified_index()
+ indexed_models = unified_index.get_indexed_models()
+
+ for doc_offset, raw_result in enumerate(raw_page):
+ score = raw_page.score(doc_offset) or 0
+ app_label, model_name = raw_result[DJANGO_CT].split('.')
+ additional_fields = {}
+ model = haystack_get_model(app_label, model_name)
+
+ if model and model in indexed_models:
+ for key, value in raw_result.items():
+ index = unified_index.get_index(model)
+ string_key = str(key)
+
+ if string_key in index.fields and hasattr(index.fields[string_key], 'convert'):
+ # Special-cased due to the nature of KEYWORD fields.
+ if index.fields[string_key].is_multivalued:
+ if value is None or len(value) is 0:
+ additional_fields[string_key] = []
+ else:
+ additional_fields[string_key] = value.split(',')
+ else:
+ additional_fields[string_key] = index.fields[string_key].convert(value)
+ else:
+ additional_fields[string_key] = self._to_python(value)
+
+ del(additional_fields[DJANGO_CT])
+ del(additional_fields[DJANGO_ID])
+
+ if highlight:
+ sa = StemmingAnalyzer()
+ formatter = WhooshHtmlFormatter('em')
+ terms = [token.text for token in sa(query_string)]
+
+ whoosh_result = whoosh_highlight(
+ additional_fields.get(self.content_field_name),
+ terms,
+ sa,
+ ContextFragmenter(),
+ formatter
+ )
+ additional_fields['highlighted'] = {
+ self.content_field_name: [whoosh_result],
+ }
+
+ result = result_class(app_label, model_name, raw_result[DJANGO_ID], score, **additional_fields)
+ results.append(result)
+ else:
+ hits -= 1
+
+ if self.include_spelling:
+ if spelling_query:
+ spelling_suggestion = self.create_spelling_suggestion(spelling_query)
+ else:
+ spelling_suggestion = self.create_spelling_suggestion(query_string)
+
+ return {
+ 'results': results,
+ 'hits': hits,
+ 'facets': facets,
+ 'spelling_suggestion': spelling_suggestion,
+ }
+
+ def create_spelling_suggestion(self, query_string):
+ spelling_suggestion = None
+ reader = self.index.reader()
+ corrector = reader.corrector(self.content_field_name)
+ cleaned_query = force_text(query_string)
+
+ if not query_string:
+ return spelling_suggestion
+
+ # Clean the string.
+ for rev_word in self.RESERVED_WORDS:
+ cleaned_query = cleaned_query.replace(rev_word, '')
+
+ for rev_char in self.RESERVED_CHARACTERS:
+ cleaned_query = cleaned_query.replace(rev_char, '')
+
+ # Break it down.
+ query_words = cleaned_query.split()
+ suggested_words = []
+
+ for word in query_words:
+ suggestions = corrector.suggest(word, limit=1)
+
+ if len(suggestions) > 0:
+ suggested_words.append(suggestions[0])
+
+ spelling_suggestion = ' '.join(suggested_words)
+ return spelling_suggestion
+
+ def _from_python(self, value):
+ """
+ Converts Python values to a string for Whoosh.
+
+ Code courtesy of pysolr.
+ """
+ if hasattr(value, 'strftime'):
+ if not hasattr(value, 'hour'):
+ value = datetime(value.year, value.month, value.day, 0, 0, 0)
+ elif isinstance(value, bool):
+ if value:
+ value = 'true'
+ else:
+ value = 'false'
+ elif isinstance(value, (list, tuple)):
+ value = u','.join([force_text(v) for v in value])
+ elif isinstance(value, (six.integer_types, float)):
+ # Leave it alone.
+ pass
+ else:
+ value = force_text(value)
+ return value
+
+ def _to_python(self, value):
+ """
+ Converts values from Whoosh to native Python values.
+
+ A port of the same method in pysolr, as they deal with data the same way.
+ """
+ if value == 'true':
+ return True
+ elif value == 'false':
+ return False
+
+ if value and isinstance(value, six.string_types):
+ possible_datetime = DATETIME_REGEX.search(value)
+
+ if possible_datetime:
+ date_values = possible_datetime.groupdict()
+
+ for dk, dv in date_values.items():
+ date_values[dk] = int(dv)
+
+ return datetime(date_values['year'], date_values['month'], date_values['day'], date_values['hour'], date_values['minute'], date_values['second'])
+
+ try:
+ # Attempt to use json to load the values.
+ converted_value = json.loads(value)
+
+ # Try to handle most built-in types.
+ if isinstance(converted_value, (list, tuple, set, dict, six.integer_types, float, complex)):
+ return converted_value
+ except:
+ # If it fails (SyntaxError or its ilk) or we don't trust it,
+ # continue on.
+ pass
+
+ return value
+
+
+class WhooshSearchQuery(BaseSearchQuery):
+ def _convert_datetime(self, date):
+ if hasattr(date, 'hour'):
+ return force_text(date.strftime('%Y%m%d%H%M%S'))
+ else:
+ return force_text(date.strftime('%Y%m%d000000'))
+
+ def clean(self, query_fragment):
+ """
+ Provides a mechanism for sanitizing user input before presenting the
+ value to the backend.
+
+ Whoosh 1.X differs here in that you can no longer use a backslash
+ to escape reserved characters. Instead, the whole word should be
+ quoted.
+ """
+ words = query_fragment.split()
+ cleaned_words = []
+
+ for word in words:
+ if word in self.backend.RESERVED_WORDS:
+ word = word.replace(word, word.lower())
+
+ for char in self.backend.RESERVED_CHARACTERS:
+ if char in word:
+ word = "'%s'" % word
+ break
+
+ cleaned_words.append(word)
+
+ return ' '.join(cleaned_words)
+
+ def build_query_fragment(self, field, filter_type, value):
+ from haystack import connections
+ query_frag = ''
+ is_datetime = False
+
+ if not hasattr(value, 'input_type_name'):
+ # Handle when we've got a ``ValuesListQuerySet``...
+ if hasattr(value, 'values_list'):
+ value = list(value)
+
+ if hasattr(value, 'strftime'):
+ is_datetime = True
+
+ if isinstance(value, six.string_types) and value != ' ':
+ # It's not an ``InputType``. Assume ``Clean``.
+ value = Clean(value)
+ else:
+ value = PythonData(value)
+
+ # Prepare the query using the InputType.
+ prepared_value = value.prepare(self)
+
+ if not isinstance(prepared_value, (set, list, tuple)):
+ # Then convert whatever we get back to what pysolr wants if needed.
+ prepared_value = self.backend._from_python(prepared_value)
+
+ # 'content' is a special reserved word, much like 'pk' in
+ # Django's ORM layer. It indicates 'no special field'.
+ if field == 'content':
+ index_fieldname = ''
+ else:
+ index_fieldname = u'%s:' % connections[self._using].get_unified_index().get_index_fieldname(field)
+
+ filter_types = {
+ 'contains': '%s',
+ 'startswith': "%s*",
+ 'exact': '%s',
+ 'gt': "{%s to}",
+ 'gte': "[%s to]",
+ 'lt': "{to %s}",
+ 'lte': "[to %s]",
+ }
+
+ if value.post_process is False:
+ query_frag = prepared_value
+ else:
+ if filter_type in ['contains', 'startswith']:
+ if value.input_type_name == 'exact':
+ query_frag = prepared_value
+ else:
+ # Iterate over terms & incorportate the converted form of each into the query.
+ terms = []
+
+ if isinstance(prepared_value, six.string_types):
+ possible_values = prepared_value.split(' ')
+ else:
+ if is_datetime is True:
+ prepared_value = self._convert_datetime(prepared_value)
+
+ possible_values = [prepared_value]
+
+ for possible_value in possible_values:
+ terms.append(filter_types[filter_type] % self.backend._from_python(possible_value))
+
+ if len(terms) == 1:
+ query_frag = terms[0]
+ else:
+ query_frag = u"(%s)" % " AND ".join(terms)
+ elif filter_type == 'in':
+ in_options = []
+
+ for possible_value in prepared_value:
+ is_datetime = False
+
+ if hasattr(possible_value, 'strftime'):
+ is_datetime = True
+
+ pv = self.backend._from_python(possible_value)
+
+ if is_datetime is True:
+ pv = self._convert_datetime(pv)
+
+ if isinstance(pv, six.string_types) and not is_datetime:
+ in_options.append('"%s"' % pv)
+ else:
+ in_options.append('%s' % pv)
+
+ query_frag = "(%s)" % " OR ".join(in_options)
+ elif filter_type == 'range':
+ start = self.backend._from_python(prepared_value[0])
+ end = self.backend._from_python(prepared_value[1])
+
+ if hasattr(prepared_value[0], 'strftime'):
+ start = self._convert_datetime(start)
+
+ if hasattr(prepared_value[1], 'strftime'):
+ end = self._convert_datetime(end)
+
+ query_frag = u"[%s to %s]" % (start, end)
+ elif filter_type == 'exact':
+ if value.input_type_name == 'exact':
+ query_frag = prepared_value
+ else:
+ prepared_value = Exact(prepared_value).prepare(self)
+ query_frag = filter_types[filter_type] % prepared_value
+ else:
+ if is_datetime is True:
+ prepared_value = self._convert_datetime(prepared_value)
+
+ query_frag = filter_types[filter_type] % prepared_value
+
+ if len(query_frag) and not isinstance(value, Raw):
+ if not query_frag.startswith('(') and not query_frag.endswith(')'):
+ query_frag = "(%s)" % query_frag
+
+ return u"%s%s" % (index_fieldname, query_frag)
+
+
+ # if not filter_type in ('in', 'range'):
+ # # 'in' is a bit of a special case, as we don't want to
+ # # convert a valid list/tuple to string. Defer handling it
+ # # until later...
+ # value = self.backend._from_python(value)
+
+
+class WhooshEngine(BaseEngine):
+ backend = WhooshSearchBackend
+ query = WhooshSearchQuery
diff --git a/haystack/constants.py b/haystack/constants.py
new file mode 100644
index 0000000..202280c
--- /dev/null
+++ b/haystack/constants.py
@@ -0,0 +1,33 @@
+# encoding: utf-8
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+from django.conf import settings
+
+DEFAULT_ALIAS = 'default'
+
+# Reserved field names
+ID = getattr(settings, 'HAYSTACK_ID_FIELD', 'id')
+DJANGO_CT = getattr(settings, 'HAYSTACK_DJANGO_CT_FIELD', 'django_ct')
+DJANGO_ID = getattr(settings, 'HAYSTACK_DJANGO_ID_FIELD', 'django_id')
+
+# Default operator. Valid options are AND/OR.
+DEFAULT_OPERATOR = getattr(settings, 'HAYSTACK_DEFAULT_OPERATOR', 'AND')
+
+# Valid expression extensions.
+VALID_FILTERS = set(['contains', 'exact', 'gt', 'gte', 'lt', 'lte', 'in', 'startswith', 'range'])
+FILTER_SEPARATOR = '__'
+
+# The maximum number of items to display in a SearchQuerySet.__repr__
+REPR_OUTPUT_SIZE = 20
+
+# Number of SearchResults to load at a time.
+ITERATOR_LOAD_PER_QUERY = getattr(settings, 'HAYSTACK_ITERATOR_LOAD_PER_QUERY', 10)
+
+# A marker class in the hierarchy to indicate that it handles search data.
+class Indexable(object):
+ haystack_use_for_indexing = True
+
+# For the geo bits, since that's what Solr & Elasticsearch seem to silently
+# assume...
+WGS_84_SRID = 4326
diff --git a/haystack/exceptions.py b/haystack/exceptions.py
new file mode 100644
index 0000000..305bd11
--- /dev/null
+++ b/haystack/exceptions.py
@@ -0,0 +1,53 @@
+# encoding: utf-8
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+
+class HaystackError(Exception):
+ """A generic exception for all others to extend."""
+ pass
+
+
+class SearchBackendError(HaystackError):
+ """Raised when a backend can not be found."""
+ pass
+
+
+class SearchFieldError(HaystackError):
+ """Raised when a field encounters an error."""
+ pass
+
+
+class MissingDependency(HaystackError):
+ """Raised when a library a backend depends on can not be found."""
+ pass
+
+
+class NotHandled(HaystackError):
+ """Raised when a model is not handled by the router setup."""
+ pass
+
+
+class MoreLikeThisError(HaystackError):
+ """Raised when a model instance has not been provided for More Like This."""
+ pass
+
+
+class FacetingError(HaystackError):
+ """Raised when incorrect arguments have been provided for faceting."""
+ pass
+
+
+class SpatialError(HaystackError):
+ """Raised when incorrect arguments have been provided for spatial."""
+ pass
+
+
+class StatsError(HaystackError):
+ "Raised when incorrect arguments have been provided for stats"
+ pass
+
+
+class SkipDocument(HaystackError):
+ """Raised when a document should be skipped while updating"""
+ pass
diff --git a/haystack/fields.py b/haystack/fields.py
new file mode 100644
index 0000000..5fd769d
--- /dev/null
+++ b/haystack/fields.py
@@ -0,0 +1,441 @@
+# encoding: utf-8
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import re
+
+from django.template import Context, loader
+from django.utils import datetime_safe, six
+
+from haystack.exceptions import SearchFieldError
+from haystack.utils import get_model_ct_tuple
+
+
+class NOT_PROVIDED:
+ pass
+
+
+DATETIME_REGEX = re.compile('^(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})(T|\s+)(?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2}).*?$')
+
+
+# All the SearchFields variants.
+
+class SearchField(object):
+ """The base implementation of a search field."""
+ field_type = None
+
+ def __init__(self, model_attr=None, use_template=False, template_name=None,
+ document=False, indexed=True, stored=True, faceted=False,
+ default=NOT_PROVIDED, null=False, index_fieldname=None,
+ facet_class=None, boost=1.0, weight=None):
+ # Track what the index thinks this field is called.
+ self.instance_name = None
+ self.model_attr = model_attr
+ self.use_template = use_template
+ self.template_name = template_name
+ self.document = document
+ self.indexed = indexed
+ self.stored = stored
+ self.faceted = faceted
+ self._default = default
+ self.null = null
+ self.index_fieldname = index_fieldname
+ self.boost = weight or boost
+ self.is_multivalued = False
+
+ # We supply the facet_class for making it easy to create a faceted
+ # field based off of this field.
+ self.facet_class = facet_class
+
+ if self.facet_class is None:
+ self.facet_class = FacetCharField
+
+ self.set_instance_name(None)
+
+ def set_instance_name(self, instance_name):
+ self.instance_name = instance_name
+
+ if self.index_fieldname is None:
+ self.index_fieldname = self.instance_name
+
+ def has_default(self):
+ """Returns a boolean of whether this field has a default value."""
+ return self._default is not NOT_PROVIDED
+
+ @property
+ def default(self):
+ """Returns the default value for the field."""
+ if callable(self._default):
+ return self._default()
+
+ return self._default
+
+ def prepare(self, obj):
+ """
+ Takes data from the provided object and prepares it for storage in the
+ index.
+ """
+ # Give priority to a template.
+ if self.use_template:
+ return self.prepare_template(obj)
+ elif self.model_attr is not None:
+ # Check for `__` in the field for looking through the relation.
+ attrs = self.model_attr.split('__')
+ current_object = obj
+
+ for attr in attrs:
+ if not hasattr(current_object, attr):
+ raise SearchFieldError("The model '%s' does not have a model_attr '%s'." % (repr(current_object), attr))
+
+ current_object = getattr(current_object, attr, None)
+
+ if current_object is None:
+ if self.has_default():
+ current_object = self._default
+ # Fall out of the loop, given any further attempts at
+ # accesses will fail miserably.
+ break
+ elif self.null:
+ current_object = None
+ # Fall out of the loop, given any further attempts at
+ # accesses will fail miserably.
+ break
+ else:
+ raise SearchFieldError("The model '%s' combined with model_attr '%s' returned None, but doesn't allow a default or null value." % (repr(obj), self.model_attr))
+
+ if callable(current_object):
+ return current_object()
+
+ return current_object
+
+ if self.has_default():
+ return self.default
+ else:
+ return None
+
+ def prepare_template(self, obj):
+ """
+ Flattens an object for indexing.
+
+ This loads a template
+ (``search/indexes/{app_label}/{model_name}_{field_name}.txt``) and
+ returns the result of rendering that template. ``object`` will be in
+ its context.
+ """
+ if self.instance_name is None and self.template_name is None:
+ raise SearchFieldError("This field requires either its instance_name variable to be populated or an explicit template_name in order to load the correct template.")
+
+ if self.template_name is not None:
+ template_names = self.template_name
+
+ if not isinstance(template_names, (list, tuple)):
+ template_names = [template_names]
+ else:
+ app_label, model_name = get_model_ct_tuple(obj)
+ template_names = ['search/indexes/%s/%s_%s.txt' % (app_label, model_name, self.instance_name)]
+
+ t = loader.select_template(template_names)
+ return t.render(Context({'object': obj}))
+
+ def convert(self, value):
+ """
+ Handles conversion between the data found and the type of the field.
+
+ Extending classes should override this method and provide correct
+ data coercion.
+ """
+ return value
+
+
+class CharField(SearchField):
+ field_type = 'string'
+
+ def __init__(self, **kwargs):
+ if kwargs.get('facet_class') is None:
+ kwargs['facet_class'] = FacetCharField
+
+ super(CharField, self).__init__(**kwargs)
+
+ def prepare(self, obj):
+ return self.convert(super(CharField, self).prepare(obj))
+
+ def convert(self, value):
+ if value is None:
+ return None
+
+ return six.text_type(value)
+
+
+class LocationField(SearchField):
+ field_type = 'location'
+
+ def prepare(self, obj):
+ from haystack.utils.geo import ensure_point
+
+ value = super(LocationField, self).prepare(obj)
+
+ if value is None:
+ return None
+
+ pnt = ensure_point(value)
+ pnt_lng, pnt_lat = pnt.get_coords()
+ return "%s,%s" % (pnt_lat, pnt_lng)
+
+ def convert(self, value):
+ from haystack.utils.geo import ensure_point, Point
+
+ if value is None:
+ return None
+
+ if hasattr(value, 'geom_type'):
+ value = ensure_point(value)
+ return value
+
+ if isinstance(value, six.string_types):
+ lat, lng = value.split(',')
+ elif isinstance(value, (list, tuple)):
+ # GeoJSON-alike
+ lat, lng = value[1], value[0]
+ elif isinstance(value, dict):
+ lat = value.get('lat', 0)
+ lng = value.get('lon', 0)
+
+ value = Point(float(lng), float(lat))
+ return value
+
+
+class NgramField(CharField):
+ field_type = 'ngram'
+
+ def __init__(self, **kwargs):
+ if kwargs.get('faceted') is True:
+ raise SearchFieldError("%s can not be faceted." % self.__class__.__name__)
+
+ super(NgramField, self).__init__(**kwargs)
+
+
+class EdgeNgramField(NgramField):
+ field_type = 'edge_ngram'
+
+
+class IntegerField(SearchField):
+ field_type = 'integer'
+
+ def __init__(self, **kwargs):
+ if kwargs.get('facet_class') is None:
+ kwargs['facet_class'] = FacetIntegerField
+
+ super(IntegerField, self).__init__(**kwargs)
+
+ def prepare(self, obj):
+ return self.convert(super(IntegerField, self).prepare(obj))
+
+ def convert(self, value):
+ if value is None:
+ return None
+
+ return int(value)
+
+
+class FloatField(SearchField):
+ field_type = 'float'
+
+ def __init__(self, **kwargs):
+ if kwargs.get('facet_class') is None:
+ kwargs['facet_class'] = FacetFloatField
+
+ super(FloatField, self).__init__(**kwargs)
+
+ def prepare(self, obj):
+ return self.convert(super(FloatField, self).prepare(obj))
+
+ def convert(self, value):
+ if value is None:
+ return None
+
+ return float(value)
+
+
+class DecimalField(SearchField):
+ field_type = 'string'
+
+ def __init__(self, **kwargs):
+ if kwargs.get('facet_class') is None:
+ kwargs['facet_class'] = FacetDecimalField
+
+ super(DecimalField, self).__init__(**kwargs)
+
+ def prepare(self, obj):
+ return self.convert(super(DecimalField, self).prepare(obj))
+
+ def convert(self, value):
+ if value is None:
+ return None
+
+ return six.text_type(value)
+
+
+class BooleanField(SearchField):
+ field_type = 'boolean'
+
+ def __init__(self, **kwargs):
+ if kwargs.get('facet_class') is None:
+ kwargs['facet_class'] = FacetBooleanField
+
+ super(BooleanField, self).__init__(**kwargs)
+
+ def prepare(self, obj):
+ return self.convert(super(BooleanField, self).prepare(obj))
+
+ def convert(self, value):
+ if value is None:
+ return None
+
+ return bool(value)
+
+
+class DateField(SearchField):
+ field_type = 'date'
+
+ def __init__(self, **kwargs):
+ if kwargs.get('facet_class') is None:
+ kwargs['facet_class'] = FacetDateField
+
+ super(DateField, self).__init__(**kwargs)
+
+ def convert(self, value):
+ if value is None:
+ return None
+
+ if isinstance(value, six.string_types):
+ match = DATETIME_REGEX.search(value)
+
+ if match:
+ data = match.groupdict()
+ return datetime_safe.date(int(data['year']), int(data['month']), int(data['day']))
+ else:
+ raise SearchFieldError("Date provided to '%s' field doesn't appear to be a valid date string: '%s'" % (self.instance_name, value))
+
+ return value
+
+
+class DateTimeField(SearchField):
+ field_type = 'datetime'
+
+ def __init__(self, **kwargs):
+ if kwargs.get('facet_class') is None:
+ kwargs['facet_class'] = FacetDateTimeField
+
+ super(DateTimeField, self).__init__(**kwargs)
+
+ def convert(self, value):
+ if value is None:
+ return None
+
+ if isinstance(value, six.string_types):
+ match = DATETIME_REGEX.search(value)
+
+ if match:
+ data = match.groupdict()
+ return datetime_safe.datetime(int(data['year']), int(data['month']), int(data['day']), int(data['hour']), int(data['minute']), int(data['second']))
+ else:
+ raise SearchFieldError("Datetime provided to '%s' field doesn't appear to be a valid datetime string: '%s'" % (self.instance_name, value))
+
+ return value
+
+
+class MultiValueField(SearchField):
+ field_type = 'string'
+
+ def __init__(self, **kwargs):
+ if kwargs.get('facet_class') is None:
+ kwargs['facet_class'] = FacetMultiValueField
+
+ if kwargs.get('use_template') is True:
+ raise SearchFieldError("'%s' fields can not use templates to prepare their data." % self.__class__.__name__)
+
+ super(MultiValueField, self).__init__(**kwargs)
+ self.is_multivalued = True
+
+ def prepare(self, obj):
+ return self.convert(super(MultiValueField, self).prepare(obj))
+
+ def convert(self, value):
+ if value is None:
+ return None
+
+ return list(value)
+
+
+class FacetField(SearchField):
+ """
+ ``FacetField`` is slightly different than the other fields because it can
+ work in conjunction with other fields as its data source.
+
+ Accepts an optional ``facet_for`` kwarg, which should be the field name
+ (not ``index_fieldname``) of the field it should pull data from.
+ """
+ instance_name = None
+
+ def __init__(self, **kwargs):
+ handled_kwargs = self.handle_facet_parameters(kwargs)
+ super(FacetField, self).__init__(**handled_kwargs)
+
+ def handle_facet_parameters(self, kwargs):
+ if kwargs.get('faceted', False):
+ raise SearchFieldError("FacetField (%s) does not accept the 'faceted' argument." % self.instance_name)
+
+ if not kwargs.get('null', True):
+ raise SearchFieldError("FacetField (%s) does not accept False for the 'null' argument." % self.instance_name)
+
+ if not kwargs.get('indexed', True):
+ raise SearchFieldError("FacetField (%s) does not accept False for the 'indexed' argument." % self.instance_name)
+
+ if kwargs.get('facet_class'):
+ raise SearchFieldError("FacetField (%s) does not accept the 'facet_class' argument." % self.instance_name)
+
+ self.facet_for = None
+ self.facet_class = None
+
+ # Make sure the field is nullable.
+ kwargs['null'] = True
+
+ if 'facet_for' in kwargs:
+ self.facet_for = kwargs['facet_for']
+ del(kwargs['facet_for'])
+
+ return kwargs
+
+ def get_facet_for_name(self):
+ return self.facet_for or self.instance_name
+
+
+class FacetCharField(FacetField, CharField):
+ pass
+
+
+class FacetIntegerField(FacetField, IntegerField):
+ pass
+
+
+class FacetFloatField(FacetField, FloatField):
+ pass
+
+
+class FacetDecimalField(FacetField, DecimalField):
+ pass
+
+
+class FacetBooleanField(FacetField, BooleanField):
+ pass
+
+
+class FacetDateField(FacetField, DateField):
+ pass
+
+
+class FacetDateTimeField(FacetField, DateTimeField):
+ pass
+
+
+class FacetMultiValueField(FacetField, MultiValueField):
+ pass
diff --git a/haystack/forms.py b/haystack/forms.py
new file mode 100644
index 0000000..c68f91b
--- /dev/null
+++ b/haystack/forms.py
@@ -0,0 +1,133 @@
+# encoding: utf-8
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+from django import forms
+from django.db import models
+from django.utils.text import capfirst
+from django.utils.translation import ugettext_lazy as _
+
+from haystack import connections
+from haystack.constants import DEFAULT_ALIAS
+from haystack.query import EmptySearchQuerySet, SearchQuerySet
+from haystack.utils import get_model_ct
+
+try:
+ from django.utils.encoding import smart_text
+except ImportError:
+ from django.utils.encoding import smart_unicode as smart_text
+
+
+def model_choices(using=DEFAULT_ALIAS):
+ choices = [(get_model_ct(m), capfirst(smart_text(m._meta.verbose_name_plural)))
+ for m in connections[using].get_unified_index().get_indexed_models()]
+ return sorted(choices, key=lambda x: x[1])
+
+
+class SearchForm(forms.Form):
+ q = forms.CharField(required=False, label=_('Search'),
+ widget=forms.TextInput(attrs={'type': 'search'}))
+
+ def __init__(self, *args, **kwargs):
+ self.searchqueryset = kwargs.pop('searchqueryset', None)
+ self.load_all = kwargs.pop('load_all', False)
+
+ if self.searchqueryset is None:
+ self.searchqueryset = SearchQuerySet()
+
+ super(SearchForm, self).__init__(*args, **kwargs)
+
+ def no_query_found(self):
+ """
+ Determines the behavior when no query was found.
+
+ By default, no results are returned (``EmptySearchQuerySet``).
+
+ Should you want to show all results, override this method in your
+ own ``SearchForm`` subclass and do ``return self.searchqueryset.all()``.
+ """
+ return EmptySearchQuerySet()
+
+ def search(self):
+ if not self.is_valid():
+ return self.no_query_found()
+
+ if not self.cleaned_data.get('q'):
+ return self.no_query_found()
+
+ sqs = self.searchqueryset.auto_query(self.cleaned_data['q'])
+
+ if self.load_all:
+ sqs = sqs.load_all()
+
+ return sqs
+
+ def get_suggestion(self):
+ if not self.is_valid():
+ return None
+
+ return self.searchqueryset.spelling_suggestion(self.cleaned_data['q'])
+
+
+class HighlightedSearchForm(SearchForm):
+ def search(self):
+ return super(HighlightedSearchForm, self).search().highlight()
+
+
+class FacetedSearchForm(SearchForm):
+ def __init__(self, *args, **kwargs):
+ self.selected_facets = kwargs.pop("selected_facets", [])
+ super(FacetedSearchForm, self).__init__(*args, **kwargs)
+
+ def search(self):
+ sqs = super(FacetedSearchForm, self).search()
+
+ # We need to process each facet to ensure that the field name and the
+ # value are quoted correctly and separately:
+ for facet in self.selected_facets:
+ if ":" not in facet:
+ continue
+
+ field, value = facet.split(":", 1)
+
+ if value:
+ sqs = sqs.narrow(u'%s:"%s"' % (field, sqs.query.clean(value)))
+
+ return sqs
+
+
+class ModelSearchForm(SearchForm):
+ def __init__(self, *args, **kwargs):
+ super(ModelSearchForm, self).__init__(*args, **kwargs)
+ self.fields['models'] = forms.MultipleChoiceField(choices=model_choices(), required=False, label=_('Search In'), widget=forms.CheckboxSelectMultiple)
+
+ def get_models(self):
+ """Return an alphabetical list of model classes in the index."""
+ search_models = []
+
+ if self.is_valid():
+ for model in self.cleaned_data['models']:
+ search_models.append(models.get_model(*model.split('.')))
+
+ return search_models
+
+ def search(self):
+ sqs = super(ModelSearchForm, self).search()
+ return sqs.models(*self.get_models())
+
+
+class HighlightedModelSearchForm(ModelSearchForm):
+ def search(self):
+ return super(HighlightedModelSearchForm, self).search().highlight()
+
+
+class FacetedModelSearchForm(ModelSearchForm):
+ selected_facets = forms.CharField(required=False, widget=forms.HiddenInput)
+
+ def search(self):
+ sqs = super(FacetedModelSearchForm, self).search()
+
+ if hasattr(self, 'cleaned_data') and self.cleaned_data['selected_facets']:
+ sqs = sqs.narrow(self.cleaned_data['selected_facets'])
+
+ return sqs.models(*self.get_models())
diff --git a/haystack/generic_views.py b/haystack/generic_views.py
new file mode 100644
index 0000000..2dad515
--- /dev/null
+++ b/haystack/generic_views.py
@@ -0,0 +1,126 @@
+# encoding: utf-8
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+from django.conf import settings
+from django.core.paginator import Paginator
+from django.views.generic import FormView
+from django.views.generic.edit import FormMixin
+from django.views.generic.list import MultipleObjectMixin
+
+from .forms import FacetedSearchForm, ModelSearchForm
+from .query import SearchQuerySet
+
+RESULTS_PER_PAGE = getattr(settings, 'HAYSTACK_SEARCH_RESULTS_PER_PAGE', 20)
+
+
+class SearchMixin(MultipleObjectMixin, FormMixin):
+ """
+ A mixin that allows adding in Haystacks search functionality into
+ another view class.
+
+ This mixin exhibits similar end functionality as the base Haystack search
+ view, but with some important distinctions oriented around greater
+ compatibility with Django's built-in class based views and mixins.
+
+ Normal flow:
+
+ self.request = request
+
+ self.form = self.build_form()
+ self.query = self.get_query()
+ self.results = self.get_results()
+
+ return self.create_response()
+
+ This mixin should:
+
+ 1. Make the form
+ 2. Get the queryset
+ 3. Return the paginated queryset
+
+ """
+ template_name = 'search/search.html'
+ load_all = True
+ form_class = ModelSearchForm
+ queryset = SearchQuerySet()
+ context_object_name = None
+ paginate_by = RESULTS_PER_PAGE
+ paginate_orphans = 0
+ paginator_class = Paginator
+ page_kwarg = 'page'
+ form_name = 'form'
+ search_field = 'q'
+ object_list = None
+
+ def get_form_kwargs(self):
+ """
+ Returns the keyword arguments for instantiating the form.
+ """
+ kwargs = {'initial': self.get_initial()}
+ if self.request.method == 'GET':
+ kwargs.update({
+ 'data': self.request.GET,
+ })
+ kwargs.update({'searchqueryset': self.get_queryset()})
+ return kwargs
+
+ def form_invalid(self, form):
+ context = self.get_context_data(**{
+ self.form_name: form,
+ 'object_list': self.get_queryset()
+ })
+ return self.render_to_response(context)
+
+ def form_valid(self, form):
+ self.queryset = form.search()
+ context = self.get_context_data(**{
+ self.form_name: form,
+ 'query': form.cleaned_data.get(self.search_field),
+ 'object_list': self.queryset
+ })
+ return self.render_to_response(context)
+
+
+class FacetedSearchMixin(SearchMixin):
+ """
+ A mixin that allows adding in a Haystack search functionality with search
+ faceting.
+ """
+ form_class = FacetedSearchForm
+
+ def get_form_kwargs(self):
+ kwargs = super(SearchMixin, self).get_form_kwargs()
+ kwargs.update({
+ 'selected_facets': self.request.GET.getlist("selected_facets")
+ })
+ return kwargs
+
+ def get_context_data(self, **kwargs):
+ context = super(FacetedSearchMixin, self).get_context_data(**kwargs)
+ context.update({'facets': self.results.facet_counts()})
+ return context
+
+
+class SearchView(SearchMixin, FormView):
+ """A view class for searching a Haystack managed search index"""
+
+ def get(self, request, *args, **kwargs):
+ """
+ Handles GET requests and instantiates a blank version of the form.
+ """
+ form_class = self.get_form_class()
+ form = self.get_form(form_class)
+
+ if form.is_valid():
+ return self.form_valid(form)
+ else:
+ return self.form_invalid(form)
+
+
+class FacetedSearchView(FacetedSearchMixin, SearchView):
+ """
+ A view class for searching a Haystack managed search index with
+ facets
+ """
+ pass
diff --git a/haystack/indexes.py b/haystack/indexes.py
new file mode 100644
index 0000000..d3d53ec
--- /dev/null
+++ b/haystack/indexes.py
@@ -0,0 +1,497 @@
+# encoding: utf-8
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import copy
+import threading
+import warnings
+
+from django.core.exceptions import ImproperlyConfigured
+from django.utils.six import with_metaclass
+
+from haystack import connection_router, connections
+from haystack.constants import DEFAULT_ALIAS, DJANGO_CT, DJANGO_ID, ID, Indexable
+from haystack.fields import *
+from haystack.manager import SearchIndexManager
+from haystack.utils import get_facet_field_name, get_identifier, get_model_ct
+
+try:
+ from django.utils.encoding import force_text
+except ImportError:
+ from django.utils.encoding import force_unicode as force_text
+
+
+class DeclarativeMetaclass(type):
+ def __new__(cls, name, bases, attrs):
+ attrs['fields'] = {}
+
+ # Inherit any fields from parent(s).
+ try:
+ parents = [b for b in bases if issubclass(b, SearchIndex)]
+ # Simulate the MRO.
+ parents.reverse()
+
+ for p in parents:
+ fields = getattr(p, 'fields', None)
+
+ if fields:
+ attrs['fields'].update(fields)
+ except NameError:
+ pass
+
+ # Build a dictionary of faceted fields for cross-referencing.
+ facet_fields = {}
+
+ for field_name, obj in attrs.items():
+ # Only need to check the FacetFields.
+ if hasattr(obj, 'facet_for'):
+ if not obj.facet_for in facet_fields:
+ facet_fields[obj.facet_for] = []
+
+ facet_fields[obj.facet_for].append(field_name)
+
+ built_fields = {}
+
+ for field_name, obj in attrs.items():
+ if isinstance(obj, SearchField):
+ field = attrs[field_name]
+ field.set_instance_name(field_name)
+ built_fields[field_name] = field
+
+ # Only check non-faceted fields for the following info.
+ if not hasattr(field, 'facet_for'):
+ if field.faceted == True:
+ # If no other field is claiming this field as
+ # ``facet_for``, create a shadow ``FacetField``.
+ if not field_name in facet_fields:
+ shadow_facet_name = get_facet_field_name(field_name)
+ shadow_facet_field = field.facet_class(facet_for=field_name)
+ shadow_facet_field.set_instance_name(shadow_facet_name)
+ built_fields[shadow_facet_name] = shadow_facet_field
+
+ attrs['fields'].update(built_fields)
+
+ # Assigning default 'objects' query manager if it does not already exist
+ if not 'objects' in attrs:
+ try:
+ attrs['objects'] = SearchIndexManager(attrs['Meta'].index_label)
+ except (KeyError, AttributeError):
+ attrs['objects'] = SearchIndexManager(DEFAULT_ALIAS)
+
+ return super(DeclarativeMetaclass, cls).__new__(cls, name, bases, attrs)
+
+
+class SearchIndex(with_metaclass(DeclarativeMetaclass, threading.local)):
+ """
+ Base class for building indexes.
+
+ An example might look like this::
+
+ import datetime
+ from haystack import indexes
+ from myapp.models import Note
+
+ class NoteIndex(indexes.SearchIndex, indexes.Indexable):
+ text = indexes.CharField(document=True, use_template=True)
+ author = indexes.CharField(model_attr='user')
+ pub_date = indexes.DateTimeField(model_attr='pub_date')
+
+ def get_model(self):
+ return Note
+
+ def index_queryset(self, using=None):
+ return self.get_model().objects.filter(pub_date__lte=datetime.datetime.now())
+
+ """
+ def __init__(self):
+ self.prepared_data = None
+ content_fields = []
+
+ self.field_map = dict()
+ for field_name, field in self.fields.items():
+ #form field map
+ self.field_map[field.index_fieldname] = field_name
+ if field.document is True:
+ content_fields.append(field_name)
+
+ if not len(content_fields) == 1:
+ raise SearchFieldError("The index '%s' must have one (and only one) SearchField with document=True." % self.__class__.__name__)
+
+ def get_model(self):
+ """
+ Should return the ``Model`` class (not an instance) that the rest of the
+ ``SearchIndex`` should use.
+
+ This method is required & you must override it to return the correct class.
+ """
+ raise NotImplementedError("You must provide a 'model' method for the '%r' index." % self)
+
+ def index_queryset(self, using=None):
+ """
+ Get the default QuerySet to index when doing a full update.
+
+ Subclasses can override this method to avoid indexing certain objects.
+ """
+ return self.get_model()._default_manager.all()
+
+ def read_queryset(self, using=None):
+ """
+ Get the default QuerySet for read actions.
+
+ Subclasses can override this method to work with other managers.
+ Useful when working with default managers that filter some objects.
+ """
+ return self.index_queryset(using=using)
+
+ def build_queryset(self, using=None, start_date=None, end_date=None):
+ """
+ Get the default QuerySet to index when doing an index update.
+
+ Subclasses can override this method to take into account related
+ model modification times.
+
+ The default is to use ``SearchIndex.index_queryset`` and filter
+ based on ``SearchIndex.get_updated_field``
+ """
+ extra_lookup_kwargs = {}
+ model = self.get_model()
+ updated_field = self.get_updated_field()
+
+ update_field_msg = ("No updated date field found for '%s' "
+ "- not restricting by age.") % model.__name__
+
+ if start_date:
+ if updated_field:
+ extra_lookup_kwargs['%s__gte' % updated_field] = start_date
+ else:
+ warnings.warn(update_field_msg)
+
+ if end_date:
+ if updated_field:
+ extra_lookup_kwargs['%s__lte' % updated_field] = end_date
+ else:
+ warnings.warn(update_field_msg)
+
+ index_qs = None
+
+ if hasattr(self, 'get_queryset'):
+ warnings.warn("'SearchIndex.get_queryset' was deprecated in Haystack v2. Please rename the method 'index_queryset'.")
+ index_qs = self.get_queryset()
+ else:
+ index_qs = self.index_queryset(using=using)
+
+ if not hasattr(index_qs, 'filter'):
+ raise ImproperlyConfigured("The '%r' class must return a 'QuerySet' in the 'index_queryset' method." % self)
+
+ # `.select_related()` seems like a good idea here but can fail on
+ # nullable `ForeignKey` as well as what seems like other cases.
+ return index_qs.filter(**extra_lookup_kwargs).order_by(model._meta.pk.name)
+
+ def prepare(self, obj):
+ """
+ Fetches and adds/alters data before indexing.
+ """
+ self.prepared_data = {
+ ID: get_identifier(obj),
+ DJANGO_CT: get_model_ct(obj),
+ DJANGO_ID: force_text(obj.pk),
+ }
+
+ for field_name, field in self.fields.items():
+ # Use the possibly overridden name, which will default to the
+ # variable name of the field.
+ self.prepared_data[field.index_fieldname] = field.prepare(obj)
+
+ if hasattr(self, "prepare_%s" % field_name):
+ value = getattr(self, "prepare_%s" % field_name)(obj)
+ self.prepared_data[field.index_fieldname] = value
+
+ return self.prepared_data
+
+ def full_prepare(self, obj):
+ self.prepared_data = self.prepare(obj)
+
+ for field_name, field in self.fields.items():
+ # Duplicate data for faceted fields.
+ if getattr(field, 'facet_for', None):
+ source_field_name = self.fields[field.facet_for].index_fieldname
+
+ # If there's data there, leave it alone. Otherwise, populate it
+ # with whatever the related field has.
+ if self.prepared_data[field_name] is None and source_field_name in self.prepared_data:
+ self.prepared_data[field.index_fieldname] = self.prepared_data[source_field_name]
+
+ # Remove any fields that lack a value and are ``null=True``.
+ if field.null is True:
+ if self.prepared_data[field.index_fieldname] is None:
+ del(self.prepared_data[field.index_fieldname])
+
+ return self.prepared_data
+
+ def get_content_field(self):
+ """Returns the field that supplies the primary document to be indexed."""
+ for field_name, field in self.fields.items():
+ if field.document is True:
+ return field.index_fieldname
+
+ def get_field_weights(self):
+ """Returns a dict of fields with weight values"""
+ weights = {}
+ for field_name, field in self.fields.items():
+ if field.boost:
+ weights[field_name] = field.boost
+ return weights
+
+ def _get_backend(self, using):
+ if using is None:
+ try:
+ using = connection_router.for_write(index=self)[0]
+ except IndexError:
+ # There's no backend to handle it. Bomb out.
+ return None
+
+ return connections[using].get_backend()
+
+ def update(self, using=None):
+ """
+ Updates the entire index.
+
+ If ``using`` is provided, it specifies which connection should be
+ used. Default relies on the routers to decide which backend should
+ be used.
+ """
+ backend = self._get_backend(using)
+
+ if backend is not None:
+ backend.update(self, self.index_queryset(using=using))
+
+ def update_object(self, instance, using=None, **kwargs):
+ """
+ Update the index for a single object. Attached to the class's
+ post-save hook.
+
+ If ``using`` is provided, it specifies which connection should be
+ used. Default relies on the routers to decide which backend should
+ be used.
+ """
+ # Check to make sure we want to index this first.
+ if self.should_update(instance, **kwargs):
+ backend = self._get_backend(using)
+
+ if backend is not None:
+ backend.update(self, [instance])
+
+ def remove_object(self, instance, using=None, **kwargs):
+ """
+ Remove an object from the index. Attached to the class's
+ post-delete hook.
+
+ If ``using`` is provided, it specifies which connection should be
+ used. Default relies on the routers to decide which backend should
+ be used.
+ """
+ backend = self._get_backend(using)
+
+ if backend is not None:
+ backend.remove(instance, **kwargs)
+
+ def clear(self, using=None):
+ """
+ Clears the entire index.
+
+ If ``using`` is provided, it specifies which connection should be
+ used. Default relies on the routers to decide which backend should
+ be used.
+ """
+ backend = self._get_backend(using)
+
+ if backend is not None:
+ backend.clear(models=[self.get_model()])
+
+ def reindex(self, using=None):
+ """
+ Completely clear the index for this model and rebuild it.
+
+ If ``using`` is provided, it specifies which connection should be
+ used. Default relies on the routers to decide which backend should
+ be used.
+ """
+ self.clear(using=using)
+ self.update(using=using)
+
+ def get_updated_field(self):
+ """
+ Get the field name that represents the updated date for the model.
+
+ If specified, this is used by the reindex command to filter out results
+ from the QuerySet, enabling you to reindex only recent records. This
+ method should either return None (reindex everything always) or a
+ string of the Model's DateField/DateTimeField name.
+ """
+ return None
+
+ def should_update(self, instance, **kwargs):
+ """
+ Determine if an object should be updated in the index.
+
+ It's useful to override this when an object may save frequently and
+ cause excessive reindexing. You should check conditions on the instance
+ and return False if it is not to be indexed.
+
+ By default, returns True (always reindex).
+ """
+ return True
+
+ def load_all_queryset(self):
+ """
+ Provides the ability to override how objects get loaded in conjunction
+ with ``SearchQuerySet.load_all``.
+
+ This is useful for post-processing the results from the query, enabling
+ things like adding ``select_related`` or filtering certain data.
+
+ By default, returns ``all()`` on the model's default manager.
+ """
+ return self.get_model()._default_manager.all()
+
+
+class BasicSearchIndex(SearchIndex):
+ text = CharField(document=True, use_template=True)
+
+
+# End SearchIndexes
+# Begin ModelSearchIndexes
+
+
+def index_field_from_django_field(f, default=CharField):
+ """
+ Returns the Haystack field type that would likely be associated with each
+ Django type.
+ """
+ result = default
+
+ if f.get_internal_type() in ('DateField', 'DateTimeField'):
+ result = DateTimeField
+ elif f.get_internal_type() in ('BooleanField', 'NullBooleanField'):
+ result = BooleanField
+ elif f.get_internal_type() in ('CommaSeparatedIntegerField',):
+ result = MultiValueField
+ elif f.get_internal_type() in ('DecimalField', 'FloatField'):
+ result = FloatField
+ elif f.get_internal_type() in ('IntegerField', 'PositiveIntegerField', 'PositiveSmallIntegerField', 'SmallIntegerField'):
+ result = IntegerField
+
+ return result
+
+
+class ModelSearchIndex(SearchIndex):
+ """
+ Introspects the model assigned to it and generates a `SearchIndex` based on
+ the fields of that model.
+
+ In addition, it adds a `text` field that is the `document=True` field and
+ has `use_template=True` option set, just like the `BasicSearchIndex`.
+
+ Usage of this class might result in inferior `SearchIndex` objects, which
+ can directly affect your search results. Use this to establish basic
+ functionality and move to custom `SearchIndex` objects for better control.
+
+ At this time, it does not handle related fields.
+ """
+ text = CharField(document=True, use_template=True)
+ # list of reserved field names
+ fields_to_skip = (ID, DJANGO_CT, DJANGO_ID, 'content', 'text')
+
+ def __init__(self, extra_field_kwargs=None):
+ self.model = None
+
+ self.prepared_data = None
+ content_fields = []
+ self.extra_field_kwargs = extra_field_kwargs or {}
+
+ # Introspect the model, adding/removing fields as needed.
+ # Adds/Excludes should happen only if the fields are not already
+ # defined in `self.fields`.
+ self._meta = getattr(self, 'Meta', None)
+
+ if self._meta:
+ self.model = getattr(self._meta, 'model', None)
+ fields = getattr(self._meta, 'fields', [])
+ excludes = getattr(self._meta, 'excludes', [])
+
+ # Add in the new fields.
+ self.fields.update(self.get_fields(fields, excludes))
+
+ for field_name, field in self.fields.items():
+ if field.document is True:
+ content_fields.append(field_name)
+
+ if not len(content_fields) == 1:
+ raise SearchFieldError("The index '%s' must have one (and only one) SearchField with document=True." % self.__class__.__name__)
+
+ def should_skip_field(self, field):
+ """
+ Given a Django model field, return if it should be included in the
+ contributed SearchFields.
+ """
+ # Skip fields in skip list
+ if field.name in self.fields_to_skip:
+ return True
+
+ # Ignore certain fields (AutoField, related fields).
+ if field.primary_key or getattr(field, 'rel'):
+ return True
+
+ return False
+
+ def get_model(self):
+ return self.model
+
+ def get_index_fieldname(self, f):
+ """
+ Given a Django field, return the appropriate index fieldname.
+ """
+ return f.name
+
+ def get_fields(self, fields=None, excludes=None):
+ """
+ Given any explicit fields to include and fields to exclude, add
+ additional fields based on the associated model.
+ """
+ final_fields = {}
+ fields = fields or []
+ excludes = excludes or []
+
+ for f in self.model._meta.fields:
+ # If the field name is already present, skip
+ if f.name in self.fields:
+ continue
+
+ # If field is not present in explicit field listing, skip
+ if fields and f.name not in fields:
+ continue
+
+ # If field is in exclude list, skip
+ if excludes and f.name in excludes:
+ continue
+
+ if self.should_skip_field(f):
+ continue
+
+ index_field_class = index_field_from_django_field(f)
+
+ kwargs = copy.copy(self.extra_field_kwargs)
+ kwargs.update({
+ 'model_attr': f.name,
+ })
+
+ if f.null is True:
+ kwargs['null'] = True
+
+ if f.has_default():
+ kwargs['default'] = f.default
+
+ final_fields[f.name] = index_field_class(**kwargs)
+ final_fields[f.name].set_instance_name(self.get_index_fieldname(f))
+
+ return final_fields
diff --git a/haystack/inputs.py b/haystack/inputs.py
new file mode 100644
index 0000000..ef0a929
--- /dev/null
+++ b/haystack/inputs.py
@@ -0,0 +1,159 @@
+# encoding: utf-8
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import re
+import warnings
+
+from django.utils.encoding import python_2_unicode_compatible
+
+try:
+ from django.utils.encoding import force_text
+except ImportError:
+ from django.utils.encoding import force_unicode as force_text
+
+
+@python_2_unicode_compatible
+class BaseInput(object):
+ """
+ The base input type. Doesn't do much. You want ``Raw`` instead.
+ """
+ input_type_name = 'base'
+ post_process = True
+
+ def __init__(self, query_string, **kwargs):
+ self.query_string = query_string
+ self.kwargs = kwargs
+
+ def __repr__(self):
+ return u"<%s '%s'>" % (self.__class__.__name__, self.__unicode__().encode('utf8'))
+
+ def __str__(self):
+ return force_text(self.query_string)
+
+ def prepare(self, query_obj):
+ return self.query_string
+
+
+class Raw(BaseInput):
+ """
+ An input type for passing a query directly to the backend.
+
+ Prone to not being very portable.
+ """
+ input_type_name = 'raw'
+ post_process = False
+
+
+class PythonData(BaseInput):
+ """
+ Represents a bare Python non-string type.
+
+ Largely only for internal use.
+ """
+ input_type_name = 'python_data'
+
+
+class Clean(BaseInput):
+ """
+ An input type for sanitizing user/untrusted input.
+ """
+ input_type_name = 'clean'
+
+ def prepare(self, query_obj):
+ query_string = super(Clean, self).prepare(query_obj)
+ return query_obj.clean(query_string)
+
+
+class Exact(BaseInput):
+ """
+ An input type for making exact matches.
+ """
+ input_type_name = 'exact'
+
+ def prepare(self, query_obj):
+ query_string = super(Exact, self).prepare(query_obj)
+
+ if self.kwargs.get('clean', False):
+ # We need to clean each part of the exact match.
+ exact_bits = [Clean(bit).prepare(query_obj) for bit in query_string.split(' ') if bit]
+ query_string = u' '.join(exact_bits)
+
+ return query_obj.build_exact_query(query_string)
+
+
+class Not(Clean):
+ """
+ An input type for negating a query.
+ """
+ input_type_name = 'not'
+
+ def prepare(self, query_obj):
+ query_string = super(Not, self).prepare(query_obj)
+ return query_obj.build_not_query(query_string)
+
+
+class AutoQuery(BaseInput):
+ """
+ A convenience class that handles common user queries.
+
+ In addition to cleaning all tokens, it handles double quote bits as
+ exact matches & terms with '-' in front as NOT queries.
+ """
+ input_type_name = 'auto_query'
+ post_process = False
+ exact_match_re = re.compile(r'"(?P<phrase>.*?)"')
+
+ def prepare(self, query_obj):
+ query_string = super(AutoQuery, self).prepare(query_obj)
+ exacts = self.exact_match_re.findall(query_string)
+ tokens = []
+ query_bits = []
+
+ for rough_token in self.exact_match_re.split(query_string):
+ if not rough_token:
+ continue
+ elif not rough_token in exacts:
+ # We have something that's not an exact match but may have more
+ # than on word in it.
+ tokens.extend(rough_token.split(' '))
+ else:
+ tokens.append(rough_token)
+
+ for token in tokens:
+ if not token:
+ continue
+ if token in exacts:
+ query_bits.append(Exact(token, clean=True).prepare(query_obj))
+ elif token.startswith('-') and len(token) > 1:
+ # This might break Xapian. Check on this.
+ query_bits.append(Not(token[1:]).prepare(query_obj))
+ else:
+ query_bits.append(Clean(token).prepare(query_obj))
+
+ return u' '.join(query_bits)
+
+
+class AltParser(BaseInput):
+ """
+ If the engine supports it, this input type allows for submitting a query
+ that uses a different parser.
+ """
+ input_type_name = 'alt_parser'
+ post_process = False
+ use_parens = False
+
+ def __init__(self, parser_name, query_string='', **kwargs):
+ self.parser_name = parser_name
+ self.query_string = query_string
+ self.kwargs = kwargs
+
+ def __repr__(self):
+ return u"<%s '%s' '%s' '%s'>" % (self.__class__.__name__, self.parser_name, self.query_string, self.kwargs)
+
+ def prepare(self, query_obj):
+ if not hasattr(query_obj, 'build_alt_parser_query'):
+ warnings.warn("Use of 'AltParser' input type is being ignored, as the '%s' backend doesn't support them." % query_obj)
+ return ''
+
+ return query_obj.build_alt_parser_query(self.parser_name, self.query_string, **self.kwargs)
diff --git a/haystack/management/__init__.py b/haystack/management/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/haystack/management/__init__.py
diff --git a/haystack/management/commands/__init__.py b/haystack/management/commands/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/haystack/management/commands/__init__.py
diff --git a/haystack/management/commands/build_solr_schema.py b/haystack/management/commands/build_solr_schema.py
new file mode 100644
index 0000000..6465e37
--- /dev/null
+++ b/haystack/management/commands/build_solr_schema.py
@@ -0,0 +1,70 @@
+# encoding: utf-8
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import sys
+from optparse import make_option
+
+from django.core.exceptions import ImproperlyConfigured
+from django.core.management.base import BaseCommand
+from django.template import Context, loader
+
+from haystack import constants
+from haystack.backends.solr_backend import SolrSearchBackend
+
+
+class Command(BaseCommand):
+ help = "Generates a Solr schema that reflects the indexes."
+ base_options = (
+ make_option("-f", "--filename", action="store", type="string", dest="filename",
+ help='If provided, directs output to a file instead of stdout.'),
+ make_option("-u", "--using", action="store", type="string", dest="using", default=constants.DEFAULT_ALIAS,
+ help='If provided, chooses a connection to work with.'),
+ )
+ option_list = BaseCommand.option_list + base_options
+
+ def handle(self, **options):
+ """Generates a Solr schema that reflects the indexes."""
+ using = options.get('using')
+ schema_xml = self.build_template(using=using)
+
+ if options.get('filename'):
+ self.write_file(options.get('filename'), schema_xml)
+ else:
+ self.print_stdout(schema_xml)
+
+ def build_context(self, using):
+ from haystack import connections, connection_router
+ backend = connections[using].get_backend()
+
+ if not isinstance(backend, SolrSearchBackend):
+ raise ImproperlyConfigured("'%s' isn't configured as a SolrEngine)." % backend.connection_alias)
+
+ content_field_name, fields = backend.build_schema(connections[using].get_unified_index().all_searchfields())
+ return Context({
+ 'content_field_name': content_field_name,
+ 'fields': fields,
+ 'default_operator': constants.DEFAULT_OPERATOR,
+ 'ID': constants.ID,
+ 'DJANGO_CT': constants.DJANGO_CT,
+ 'DJANGO_ID': constants.DJANGO_ID,
+ })
+
+ def build_template(self, using):
+ t = loader.get_template('search_configuration/solr.xml')
+ c = self.build_context(using=using)
+ return t.render(c)
+
+ def print_stdout(self, schema_xml):
+ sys.stderr.write("\n")
+ sys.stderr.write("\n")
+ sys.stderr.write("\n")
+ sys.stderr.write("Save the following output to 'schema.xml' and place it in your Solr configuration directory.\n")
+ sys.stderr.write("--------------------------------------------------------------------------------------------\n")
+ sys.stderr.write("\n")
+ print(schema_xml)
+
+ def write_file(self, filename, schema_xml):
+ schema_file = open(filename, 'w')
+ schema_file.write(schema_xml)
+ schema_file.close()
diff --git a/haystack/management/commands/clear_index.py b/haystack/management/commands/clear_index.py
new file mode 100644
index 0000000..e9803e6
--- /dev/null
+++ b/haystack/management/commands/clear_index.py
@@ -0,0 +1,59 @@
+# encoding: utf-8
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import sys
+from optparse import make_option
+
+from django.core.management.base import BaseCommand
+from django.utils import six
+
+
+class Command(BaseCommand):
+ help = "Clears out the search index completely."
+ base_options = (
+ make_option('--noinput', action='store_false', dest='interactive', default=True,
+ help='If provided, no prompts will be issued to the user and the data will be wiped out.'
+ ),
+ make_option("-u", "--using", action="append", dest="using",
+ default=[],
+ help='Update only the named backend (can be used multiple times). '
+ 'By default all backends will be updated.'
+ ),
+ make_option('--nocommit', action='store_false', dest='commit',
+ default=True, help='Will pass commit=False to the backend.'
+ ),
+ )
+ option_list = BaseCommand.option_list + base_options
+
+ def handle(self, **options):
+ """Clears out the search index completely."""
+ from haystack import connections
+ self.verbosity = int(options.get('verbosity', 1))
+ self.commit = options.get('commit', True)
+
+ using = options.get('using')
+ if not using:
+ using = connections.connections_info.keys()
+
+ if options.get('interactive', True):
+ print()
+ print("WARNING: This will irreparably remove EVERYTHING from your search index in connection '%s'." % "', '".join(using))
+ print("Your choices after this are to restore from backups or rebuild via the `rebuild_index` command.")
+
+ yes_or_no = six.moves.input("Are you sure you wish to continue? [y/N] ")
+ print
+
+ if not yes_or_no.lower().startswith('y'):
+ print("No action taken.")
+ sys.exit()
+
+ if self.verbosity >= 1:
+ print("Removing all documents from your index because you said so.")
+
+ for backend_name in using:
+ backend = connections[backend_name].get_backend()
+ backend.clear(commit=self.commit)
+
+ if self.verbosity >= 1:
+ print("All documents removed.")
diff --git a/haystack/management/commands/haystack_info.py b/haystack/management/commands/haystack_info.py
new file mode 100644
index 0000000..9e478a6
--- /dev/null
+++ b/haystack/management/commands/haystack_info.py
@@ -0,0 +1,21 @@
+# encoding: utf-8
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+from django.core.management.base import NoArgsCommand
+
+
+class Command(NoArgsCommand):
+ help = "Provides feedback about the current Haystack setup."
+
+ def handle_noargs(self, **options):
+ """Provides feedback about the current Haystack setup."""
+ from haystack import connections
+
+ unified_index = connections['default'].get_unified_index()
+ indexed = unified_index.get_indexed_models()
+ index_count = len(indexed)
+ print("Number of handled %s index(es)." % index_count)
+
+ for index in indexed:
+ print(" - Model: %s by Index: %s" % (index.__name__, unified_index.get_indexes()[index]))
diff --git a/haystack/management/commands/rebuild_index.py b/haystack/management/commands/rebuild_index.py
new file mode 100644
index 0000000..58c1096
--- /dev/null
+++ b/haystack/management/commands/rebuild_index.py
@@ -0,0 +1,26 @@
+# encoding: utf-8
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+from django.core.management import call_command
+from django.core.management.base import BaseCommand
+
+from haystack.management.commands.clear_index import Command as ClearCommand
+from haystack.management.commands.update_index import Command as UpdateCommand
+
+__all__ = ['Command']
+
+_combined_options = list(BaseCommand.option_list)
+_combined_options.extend(option for option in UpdateCommand.base_options
+ if option.get_opt_string() not in [i.get_opt_string() for i in _combined_options])
+_combined_options.extend(option for option in ClearCommand.base_options
+ if option.get_opt_string() not in [i.get_opt_string() for i in _combined_options])
+
+
+class Command(BaseCommand):
+ help = "Completely rebuilds the search index by removing the old data and then updating."
+ option_list = _combined_options
+
+ def handle(self, **options):
+ call_command('clear_index', **options)
+ call_command('update_index', **options)
diff --git a/haystack/management/commands/update_index.py b/haystack/management/commands/update_index.py
new file mode 100755
index 0000000..81981e5
--- /dev/null
+++ b/haystack/management/commands/update_index.py
@@ -0,0 +1,289 @@
+# encoding: utf-8
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import logging
+import os
+import sys
+import warnings
+from datetime import timedelta
+from optparse import make_option
+
+try:
+ from django.db import close_old_connections
+except ImportError:
+ # This can be removed when we drop support for Django 1.7 and earlier:
+ from django.db import close_connection as close_old_connections
+
+from django.core.management.base import LabelCommand
+from django.db import reset_queries
+
+from haystack import connections as haystack_connections
+from haystack.query import SearchQuerySet
+from haystack.utils.app_loading import haystack_get_models, haystack_load_apps
+
+try:
+ from django.utils.encoding import force_text
+except ImportError:
+ from django.utils.encoding import force_unicode as force_text
+
+try:
+ from django.utils.encoding import smart_bytes
+except ImportError:
+ from django.utils.encoding import smart_str as smart_bytes
+
+try:
+ from django.utils.timezone import now
+except ImportError:
+ from datetime import datetime
+ now = datetime.now
+
+
+DEFAULT_BATCH_SIZE = None
+DEFAULT_AGE = None
+APP = 'app'
+MODEL = 'model'
+
+
+def worker(bits):
+ # We need to reset the connections, otherwise the different processes
+ # will try to share the connection, which causes things to blow up.
+ from django.db import connections
+
+ for alias, info in connections.databases.items():
+ # We need to also tread lightly with SQLite, because blindly wiping
+ # out connections (via ``... = {}``) destroys in-memory DBs.
+ if 'sqlite3' not in info['ENGINE']:
+ try:
+ close_old_connections()
+ if isinstance(connections._connections, dict):
+ del(connections._connections[alias])
+ else:
+ delattr(connections._connections, alias)
+ except KeyError:
+ pass
+
+ if bits[0] == 'do_update':
+ func, model, start, end, total, using, start_date, end_date, verbosity, commit = bits
+ elif bits[0] == 'do_remove':
+ func, model, pks_seen, start, upper_bound, using, verbosity, commit = bits
+ else:
+ return
+
+ unified_index = haystack_connections[using].get_unified_index()
+ index = unified_index.get_index(model)
+ backend = haystack_connections[using].get_backend()
+
+ if func == 'do_update':
+ qs = index.build_queryset(start_date=start_date, end_date=end_date)
+ do_update(backend, index, qs, start, end, total, verbosity=verbosity, commit=commit)
+ else:
+ raise NotImplementedError('Unknown function %s' % func)
+
+
+def do_update(backend, index, qs, start, end, total, verbosity=1, commit=True):
+ # Get a clone of the QuerySet so that the cache doesn't bloat up
+ # in memory. Useful when reindexing large amounts of data.
+ small_cache_qs = qs.all()
+ current_qs = small_cache_qs[start:end]
+
+ if verbosity >= 2:
+ if hasattr(os, 'getppid') and os.getpid() == os.getppid():
+ print(" indexed %s - %d of %d." % (start + 1, end, total))
+ else:
+ print(" indexed %s - %d of %d (by %s)." % (start + 1, end, total, os.getpid()))
+
+ # FIXME: Get the right backend.
+ backend.update(index, current_qs, commit=commit)
+
+ # Clear out the DB connections queries because it bloats up RAM.
+ reset_queries()
+
+
+class Command(LabelCommand):
+ help = "Freshens the index for the given app(s)."
+ base_options = (
+ make_option('-a', '--age', action='store', dest='age',
+ default=DEFAULT_AGE, type='int',
+ help='Number of hours back to consider objects new.'
+ ),
+ make_option('-s', '--start', action='store', dest='start_date',
+ default=None, type='string',
+ help='The start date for indexing within. Can be any dateutil-parsable string, recommended to be YYYY-MM-DDTHH:MM:SS.'
+ ),
+ make_option('-e', '--end', action='store', dest='end_date',
+ default=None, type='string',
+ help='The end date for indexing within. Can be any dateutil-parsable string, recommended to be YYYY-MM-DDTHH:MM:SS.'
+ ),
+ make_option('-b', '--batch-size', action='store', dest='batchsize',
+ default=None, type='int',
+ help='Number of items to index at once.'
+ ),
+ make_option('-r', '--remove', action='store_true', dest='remove',
+ default=False, help='Remove objects from the index that are no longer present in the database.'
+ ),
+ make_option("-u", "--using", action="append", dest="using",
+ default=[],
+ help='Update only the named backend (can be used multiple times). '
+ 'By default all backends will be updated.'
+ ),
+ make_option('-k', '--workers', action='store', dest='workers',
+ default=0, type='int',
+ help='Allows for the use multiple workers to parallelize indexing. Requires multiprocessing.'
+ ),
+ make_option('--nocommit', action='store_false', dest='commit',
+ default=True, help='Will pass commit=False to the backend.'
+ ),
+ )
+ option_list = LabelCommand.option_list + base_options
+
+ def handle(self, *items, **options):
+ self.verbosity = int(options.get('verbosity', 1))
+ self.batchsize = options.get('batchsize', DEFAULT_BATCH_SIZE)
+ self.start_date = None
+ self.end_date = None
+ self.remove = options.get('remove', False)
+ self.workers = int(options.get('workers', 0))
+ self.commit = options.get('commit', True)
+
+ if sys.version_info < (2, 7):
+ warnings.warn('multiprocessing is disabled on Python 2.6 and earlier. '
+ 'See https://github.com/toastdriven/django-haystack/issues/1001')
+ self.workers = 0
+
+ self.backends = options.get('using')
+ if not self.backends:
+ self.backends = haystack_connections.connections_info.keys()
+
+ age = options.get('age', DEFAULT_AGE)
+ start_date = options.get('start_date')
+ end_date = options.get('end_date')
+
+ if age is not None:
+ self.start_date = now() - timedelta(hours=int(age))
+
+ if start_date is not None:
+ from dateutil.parser import parse as dateutil_parse
+
+ try:
+ self.start_date = dateutil_parse(start_date)
+ except ValueError:
+ pass
+
+ if end_date is not None:
+ from dateutil.parser import parse as dateutil_parse
+
+ try:
+ self.end_date = dateutil_parse(end_date)
+ except ValueError:
+ pass
+
+ if not items:
+ items = haystack_load_apps()
+
+ return super(Command, self).handle(*items, **options)
+
+ def handle_label(self, label, **options):
+ for using in self.backends:
+ try:
+ self.update_backend(label, using)
+ except:
+ logging.exception("Error updating %s using %s ", label, using)
+ raise
+
+ def update_backend(self, label, using):
+ from haystack.exceptions import NotHandled
+
+ backend = haystack_connections[using].get_backend()
+ unified_index = haystack_connections[using].get_unified_index()
+
+ if self.workers > 0:
+ import multiprocessing
+
+ for model in haystack_get_models(label):
+ try:
+ index = unified_index.get_index(model)
+ except NotHandled:
+ if self.verbosity >= 2:
+ print("Skipping '%s' - no index." % model)
+ continue
+
+ if self.workers > 0:
+ # workers resetting connections leads to references to models / connections getting
+ # stale and having their connection disconnected from under them. Resetting before
+ # the loop continues and it accesses the ORM makes it better.
+ close_old_connections()
+
+ qs = index.build_queryset(using=using, start_date=self.start_date,
+ end_date=self.end_date)
+
+ total = qs.count()
+
+ if self.verbosity >= 1:
+ print(u"Indexing %d %s" % (total, force_text(model._meta.verbose_name_plural)))
+
+ batch_size = self.batchsize or backend.batch_size
+
+ if self.workers > 0:
+ ghetto_queue = []
+
+ for start in range(0, total, batch_size):
+ end = min(start + batch_size, total)
+
+ if self.workers == 0:
+ do_update(backend, index, qs, start, end, total, verbosity=self.verbosity, commit=self.commit)
+ else:
+ ghetto_queue.append(('do_update', model, start, end, total, using, self.start_date, self.end_date, self.verbosity, self.commit))
+
+ if self.workers > 0:
+ pool = multiprocessing.Pool(self.workers)
+ pool.map(worker, ghetto_queue)
+ pool.close()
+ pool.join()
+
+ if self.remove:
+ if self.start_date or self.end_date or total <= 0:
+ # They're using a reduced set, which may not incorporate
+ # all pks. Rebuild the list with everything.
+ qs = index.index_queryset().values_list('pk', flat=True)
+ database_pks = set(smart_bytes(pk) for pk in qs)
+
+ total = len(database_pks)
+ else:
+ database_pks = set(smart_bytes(pk) for pk in qs.values_list('pk', flat=True))
+
+ # Since records may still be in the search index but not the local database
+ # we'll use that to create batches for processing.
+ # See https://github.com/django-haystack/django-haystack/issues/1186
+ index_total = SearchQuerySet(using=backend.connection_alias).models(model).count()
+
+ # Retrieve PKs from the index. Note that this cannot be a numeric range query because although
+ # pks are normally numeric they can be non-numeric UUIDs or other custom values. To reduce
+ # load on the search engine, we only retrieve the pk field, which will be checked against the
+ # full list obtained from the database, and the id field, which will be used to delete the
+ # record should it be found to be stale.
+ index_pks = SearchQuerySet(using=backend.connection_alias).models(model)
+ index_pks = index_pks.values_list('pk', 'id')
+
+ # We'll collect all of the record IDs which are no longer present in the database and delete
+ # them after walking the entire index. This uses more memory than the incremental approach but
+ # avoids needing the pagination logic below to account for both commit modes:
+ stale_records = set()
+
+ for start in range(0, index_total, batch_size):
+ upper_bound = start + batch_size
+
+ # If the database pk is no longer present, queue the index key for removal:
+ for pk, rec_id in index_pks[start:upper_bound]:
+ if smart_bytes(pk) not in database_pks:
+ stale_records.add(rec_id)
+
+ if stale_records:
+ if self.verbosity >= 1:
+ print(" removing %d stale records." % len(stale_records))
+
+ for rec_id in stale_records:
+ # Since the PK was not in the database list, we'll delete the record from the search index:
+ if self.verbosity >= 2:
+ print(" removing %s." % rec_id)
+
+ backend.remove(rec_id, commit=self.commit)
diff --git a/haystack/manager.py b/haystack/manager.py
new file mode 100644
index 0000000..b7588d2
--- /dev/null
+++ b/haystack/manager.py
@@ -0,0 +1,107 @@
+# encoding: utf-8
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+from haystack.query import EmptySearchQuerySet, SearchQuerySet
+
+
+class SearchIndexManager(object):
+ def __init__(self, using=None):
+ super(SearchIndexManager, self).__init__()
+ self.using = using
+
+ def get_search_queryset(self):
+ """Returns a new SearchQuerySet object. Subclasses can override this method
+ to easily customize the behavior of the Manager.
+ """
+ return SearchQuerySet(using=self.using)
+
+ def get_empty_query_set(self):
+ return EmptySearchQuerySet(using=self.using)
+
+ def all(self):
+ return self.get_search_queryset()
+
+ def none(self):
+ return self.get_empty_query_set()
+
+ def filter(self, *args, **kwargs):
+ return self.get_search_queryset().filter(*args, **kwargs)
+
+ def exclude(self, *args, **kwargs):
+ return self.get_search_queryset().exclude(*args, **kwargs)
+
+ def filter_and(self, *args, **kwargs):
+ return self.get_search_queryset().filter_and(*args, **kwargs)
+
+ def filter_or(self, *args, **kwargs):
+ return self.get_search_queryset().filter_or(*args, **kwargs)
+
+ def order_by(self, *args):
+ return self.get_search_queryset().order_by(*args)
+
+ def highlight(self):
+ return self.get_search_queryset().highlight()
+
+ def boost(self, term, boost):
+ return self.get_search_queryset().boost(term, boost)
+
+ def facet(self, field):
+ return self.get_search_queryset().facet(field)
+
+ def within(self, field, point_1, point_2):
+ return self.get_search_queryset().within(field, point_1, point_2)
+
+ def dwithin(self, field, point, distance):
+ return self.get_search_queryset().dwithin(field, point, distance)
+
+ def distance(self, field, point):
+ return self.get_search_queryset().distance(field, point)
+
+ def date_facet(self, field, start_date, end_date, gap_by, gap_amount=1):
+ return self.get_search_queryset().date_facet(field, start_date, end_date, gap_by, gap_amount=1)
+
+ def query_facet(self, field, query):
+ return self.get_search_queryset().query_facet(field, query)
+
+ def narrow(self, query):
+ return self.get_search_queryset().narrow(query)
+
+ def raw_search(self, query_string, **kwargs):
+ return self.get_search_queryset().raw_search(query_string, **kwargs)
+
+ def load_all(self):
+ return self.get_search_queryset().load_all()
+
+ def auto_query(self, query_string, fieldname='content'):
+ return self.get_search_queryset().auto_query(query_string, fieldname=fieldname)
+
+ def autocomplete(self, **kwargs):
+ return self.get_search_queryset().autocomplete(**kwargs)
+
+ def using(self, connection_name):
+ return self.get_search_queryset().using(connection_name)
+
+ def count(self):
+ return self.get_search_queryset().count()
+
+ def best_match(self):
+ return self.get_search_queryset().best_match()
+
+ def latest(self, date_field):
+ return self.get_search_queryset().latest(date_field)
+
+ def more_like_this(self, model_instance):
+ return self.get_search_queryset().more_like_this(model_instance)
+
+ def facet_counts(self):
+ return self.get_search_queryset().facet_counts()
+
+ def spelling_suggestion(self, preferred_query=None):
+ return self.get_search_queryset().spelling_suggestion(preferred_query=None)
+
+ def values(self, *fields):
+ return self.get_search_queryset().values(*fields)
+
+ def values_list(self, *fields, **kwargs):
+ return self.get_search_queryset().values_list(*fields, **kwargs)
diff --git a/haystack/models.py b/haystack/models.py
new file mode 100644
index 0000000..a121207
--- /dev/null
+++ b/haystack/models.py
@@ -0,0 +1,247 @@
+# encoding: utf-8
+
+# "Hey, Django! Look at me, I'm an app! For Serious!"
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+from django.conf import settings
+from django.core.exceptions import ObjectDoesNotExist
+from django.db import models
+from django.utils import six
+from django.utils.text import capfirst
+
+from haystack.exceptions import NotHandled, SpatialError
+from haystack.utils import log as logging
+
+try:
+ from django.utils.encoding import force_text
+except ImportError:
+ from django.utils.encoding import force_unicode as force_text
+
+try:
+ from geopy import distance as geopy_distance
+except ImportError:
+ geopy_distance = None
+
+
+# Not a Django model, but tightly tied to them and there doesn't seem to be a
+# better spot in the tree.
+class SearchResult(object):
+ """
+ A single search result. The actual object is loaded lazily by accessing
+ object; until then this object only stores the model, pk, and score.
+
+ Note that iterating over SearchResults and getting the object for each
+ result will do O(N) database queries, which may not fit your needs for
+ performance.
+ """
+ def __init__(self, app_label, model_name, pk, score, **kwargs):
+ self.app_label, self.model_name = app_label, model_name
+ self.pk = pk
+ self.score = score
+ self._object = None
+ self._model = None
+ self._verbose_name = None
+ self._additional_fields = []
+ self._point_of_origin = kwargs.pop('_point_of_origin', None)
+ self._distance = kwargs.pop('_distance', None)
+ self.stored_fields = None
+ self.log = self._get_log()
+
+ for key, value in kwargs.items():
+ if not key in self.__dict__:
+ self.__dict__[key] = value
+ self._additional_fields.append(key)
+
+ def _get_log(self):
+ return logging.getLogger('haystack')
+
+ def __repr__(self):
+ return "<SearchResult: %s.%s (pk=%r)>" % (self.app_label, self.model_name, self.pk)
+
+ def __unicode__(self):
+ return force_text(self.__repr__())
+
+ def __getattr__(self, attr):
+ if attr == '__getnewargs__':
+ raise AttributeError
+
+ return self.__dict__.get(attr, None)
+
+ def _get_searchindex(self):
+ from haystack import connections
+ return connections['default'].get_unified_index().get_index(self.model)
+
+ searchindex = property(_get_searchindex)
+
+ def _get_object(self):
+ if self._object is None:
+ if self.model is None:
+ self.log.error("Model could not be found for SearchResult '%s'.", self)
+ return None
+
+ try:
+ try:
+ self._object = self.searchindex.read_queryset().get(pk=self.pk)
+ except NotHandled:
+ self.log.warning("Model '%s.%s' not handled by the routers.", self.app_label, self.model_name)
+ # Revert to old behaviour
+ self._object = self.model._default_manager.get(pk=self.pk)
+ except ObjectDoesNotExist:
+ self.log.error("Object could not be found in database for SearchResult '%s'.", self)
+ self._object = None
+
+ return self._object
+
+ def _set_object(self, obj):
+ self._object = obj
+
+ object = property(_get_object, _set_object)
+
+ def _get_model(self):
+ if self._model is None:
+ try:
+ self._model = models.get_model(self.app_label, self.model_name)
+ except LookupError:
+ # this changed in change 1.7 to throw an error instead of
+ # returning None when the model isn't found. So catch the
+ # lookup error and keep self._model == None.
+ pass
+
+ return self._model
+
+ def _set_model(self, obj):
+ self._model = obj
+
+ model = property(_get_model, _set_model)
+
+ def _get_distance(self):
+ from haystack.utils.geo import Distance
+
+ if self._distance is None:
+ # We didn't get it from the backend & we haven't tried calculating
+ # it yet. Check if geopy is available to do it the "slow" way
+ # (even though slow meant 100 distance calculations in 0.004 seconds
+ # in my testing).
+ if geopy_distance is None:
+ raise SpatialError("The backend doesn't have 'DISTANCE_AVAILABLE' enabled & the 'geopy' library could not be imported, so distance information is not available.")
+
+ if not self._point_of_origin:
+ raise SpatialError("The original point is not available.")
+
+ if not hasattr(self, self._point_of_origin['field']):
+ raise SpatialError("The field '%s' was not included in search results, so the distance could not be calculated." % self._point_of_origin['field'])
+
+ po_lng, po_lat = self._point_of_origin['point'].get_coords()
+ location_field = getattr(self, self._point_of_origin['field'])
+
+ if location_field is None:
+ return None
+
+ lf_lng, lf_lat = location_field.get_coords()
+ self._distance = Distance(km=geopy_distance.distance((po_lat, po_lng), (lf_lat, lf_lng)).km)
+
+ # We've either already calculated it or the backend returned it, so
+ # let's use that.
+ return self._distance
+
+ def _set_distance(self, dist):
+ self._distance = dist
+
+ distance = property(_get_distance, _set_distance)
+
+ def _get_verbose_name(self):
+ if self.model is None:
+ self.log.error("Model could not be found for SearchResult '%s'.", self)
+ return u''
+
+ return force_text(capfirst(self.model._meta.verbose_name))
+
+ verbose_name = property(_get_verbose_name)
+
+ def _get_verbose_name_plural(self):
+ if self.model is None:
+ self.log.error("Model could not be found for SearchResult '%s'.", self)
+ return u''
+
+ return force_text(capfirst(self.model._meta.verbose_name_plural))
+
+ verbose_name_plural = property(_get_verbose_name_plural)
+
+ def content_type(self):
+ """Returns the content type for the result's model instance."""
+ if self.model is None:
+ self.log.error("Model could not be found for SearchResult '%s'.", self)
+ return u''
+
+ return six.text_type(self.model._meta)
+
+ def get_additional_fields(self):
+ """
+ Returns a dictionary of all of the fields from the raw result.
+
+ Useful for serializing results. Only returns what was seen from the
+ search engine, so it may have extra fields Haystack's indexes aren't
+ aware of.
+ """
+ additional_fields = {}
+
+ for fieldname in self._additional_fields:
+ additional_fields[fieldname] = getattr(self, fieldname)
+
+ return additional_fields
+
+ def get_stored_fields(self):
+ """
+ Returns a dictionary of all of the stored fields from the SearchIndex.
+
+ Useful for serializing results. Only returns the fields Haystack's
+ indexes are aware of as being 'stored'.
+ """
+ if self._stored_fields is None:
+ from haystack import connections
+ from haystack.exceptions import NotHandled
+
+ try:
+ index = connections['default'].get_unified_index().get_index(self.model)
+ except NotHandled:
+ # Not found? Return nothing.
+ return {}
+
+ self._stored_fields = {}
+
+ # Iterate through the index's fields, pulling out the fields that
+ # are stored.
+ for fieldname, field in index.fields.items():
+ if field.stored is True:
+ self._stored_fields[fieldname] = getattr(self, fieldname, u'')
+
+ return self._stored_fields
+
+ def __getstate__(self):
+ """
+ Returns a dictionary representing the ``SearchResult`` in order to
+ make it pickleable.
+ """
+ # The ``log`` is excluded because, under the hood, ``logging`` uses
+ # ``threading.Lock``, which doesn't pickle well.
+ ret_dict = self.__dict__.copy()
+ del(ret_dict['log'])
+ return ret_dict
+
+ def __setstate__(self, data_dict):
+ """
+ Updates the object's attributes according to data passed by pickle.
+ """
+ self.__dict__.update(data_dict)
+ self.log = self._get_log()
+
+
+def reload_indexes(sender, *args, **kwargs):
+ from haystack import connections
+
+ for conn in connections.all():
+ ui = conn.get_unified_index()
+ # Note: Unlike above, we're resetting the ``UnifiedIndex`` here.
+ # Thi gives us a clean slate.
+ ui.reset()
diff --git a/haystack/panels.py b/haystack/panels.py
new file mode 100644
index 0000000..058c7ad
--- /dev/null
+++ b/haystack/panels.py
@@ -0,0 +1,86 @@
+# encoding: utf-8
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import datetime
+
+from debug_toolbar.panels import DebugPanel
+from django.template.loader import render_to_string
+from django.utils import six
+from django.utils.translation import ugettext_lazy as _
+
+from haystack import connections
+
+
+class HaystackDebugPanel(DebugPanel):
+ """
+ Panel that displays information about the Haystack queries run while
+ processing the request.
+ """
+ name = 'Haystack'
+ has_content = True
+
+ def __init__(self, *args, **kwargs):
+ super(self.__class__, self).__init__(*args, **kwargs)
+ self._offset = dict((alias, len(connections[alias].queries)) for alias in connections.connections_info.keys())
+ self._search_time = 0
+ self._queries = []
+ self._backends = {}
+
+ def nav_title(self):
+ return _('Haystack')
+
+ def nav_subtitle(self):
+ self._queries = []
+ self._backends = {}
+
+ for alias in connections.connections_info.keys():
+ search_queries = connections[alias].queries[self._offset[alias]:]
+ self._backends[alias] = {
+ 'time_spent': sum(float(q['time']) for q in search_queries),
+ 'queries': len(search_queries),
+ }
+ self._queries.extend([(alias, q) for q in search_queries])
+
+ self._queries.sort(key=lambda x: x[1]['start'])
+ self._search_time = sum([d['time_spent'] for d in self._backends.itervalues()])
+ num_queries = len(self._queries)
+ return "%d %s in %.2fms" % (
+ num_queries,
+ (num_queries == 1) and 'query' or 'queries',
+ self._search_time
+ )
+
+ def title(self):
+ return _('Search Queries')
+
+ def url(self):
+ return ''
+
+ def content(self):
+ width_ratio_tally = 0
+
+ for alias, query in self._queries:
+ query['alias'] = alias
+ query['query'] = query['query_string']
+
+ if query.get('additional_kwargs'):
+ if query['additional_kwargs'].get('result_class'):
+ query['additional_kwargs']['result_class'] = six.text_type(query['additional_kwargs']['result_class'])
+
+ try:
+ query['width_ratio'] = (float(query['time']) / self._search_time) * 100
+ except ZeroDivisionError:
+ query['width_ratio'] = 0
+
+ query['start_offset'] = width_ratio_tally
+ width_ratio_tally += query['width_ratio']
+
+ context = self.context.copy()
+ context.update({
+ 'backends': sorted(self._backends.items(), key=lambda x: -x[1]['time_spent']),
+ 'queries': [q for a, q in self._queries],
+ 'sql_time': self._search_time,
+ })
+
+ return render_to_string('panels/haystack.html', context)
diff --git a/haystack/query.py b/haystack/query.py
new file mode 100644
index 0000000..096a4b3
--- /dev/null
+++ b/haystack/query.py
@@ -0,0 +1,841 @@
+# encoding: utf-8
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import operator
+import warnings
+
+from django.utils import six
+
+from haystack import connection_router, connections
+from haystack.backends import SQ
+from haystack.constants import DEFAULT_OPERATOR, ITERATOR_LOAD_PER_QUERY, REPR_OUTPUT_SIZE
+from haystack.exceptions import NotHandled
+from haystack.inputs import AutoQuery, Clean, Raw
+from haystack.utils import log as logging
+
+
+class SearchQuerySet(object):
+ """
+ Provides a way to specify search parameters and lazily load results.
+
+ Supports chaining (a la QuerySet) to narrow the search.
+ """
+ def __init__(self, using=None, query=None):
+ # ``_using`` should only ever be a value other than ``None`` if it's
+ # been forced with the ``.using`` method.
+ self._using = using
+ self.query = None
+ self._determine_backend()
+
+ # If ``query`` is present, it should override even what the routers
+ # think.
+ if query is not None:
+ self.query = query
+
+ self._result_cache = []
+ self._result_count = None
+ self._cache_full = False
+ self._load_all = False
+ self._ignored_result_count = 0
+ self.log = logging.getLogger('haystack')
+
+ def _determine_backend(self):
+ from haystack import connections
+ # A backend has been manually selected. Use it instead.
+ if self._using is not None:
+ self.query = connections[self._using].get_query()
+ return
+
+ # No backend, so rely on the routers to figure out what's right.
+ hints = {}
+
+ if self.query:
+ hints['models'] = self.query.models
+
+ backend_alias = connection_router.for_read(**hints)
+
+ if isinstance(backend_alias, (list, tuple)) and len(backend_alias):
+ # We can only effectively read from one engine.
+ backend_alias = backend_alias[0]
+
+ # The ``SearchQuery`` might swap itself out for a different variant
+ # here.
+ if self.query:
+ self.query = self.query.using(backend_alias)
+ else:
+ self.query = connections[backend_alias].get_query()
+
+ def __getstate__(self):
+ """
+ For pickling.
+ """
+ len(self)
+ obj_dict = self.__dict__.copy()
+ obj_dict['_iter'] = None
+ obj_dict['log'] = None
+ return obj_dict
+
+ def __setstate__(self, data_dict):
+ """
+ For unpickling.
+ """
+ self.__dict__ = data_dict
+ self.log = logging.getLogger('haystack')
+
+ def __repr__(self):
+ data = list(self[:REPR_OUTPUT_SIZE])
+
+ if len(self) > REPR_OUTPUT_SIZE:
+ data[-1] = "...(remaining elements truncated)..."
+
+ return repr(data)
+
+ def __len__(self):
+ if not self._result_count:
+ self._result_count = self.query.get_count()
+
+ # Some backends give weird, false-y values here. Convert to zero.
+ if not self._result_count:
+ self._result_count = 0
+
+ # This needs to return the actual number of hits, not what's in the cache.
+ return self._result_count - self._ignored_result_count
+
+ def __iter__(self):
+ if self._cache_is_full():
+ # We've got a fully populated cache. Let Python do the hard work.
+ return iter(self._result_cache)
+
+ return self._manual_iter()
+
+ def __and__(self, other):
+ if isinstance(other, EmptySearchQuerySet):
+ return other._clone()
+ combined = self._clone()
+ combined.query.combine(other.query, SQ.AND)
+ return combined
+
+ def __or__(self, other):
+ combined = self._clone()
+ if isinstance(other, EmptySearchQuerySet):
+ return combined
+ combined.query.combine(other.query, SQ.OR)
+ return combined
+
+ def _cache_is_full(self):
+ if not self.query.has_run():
+ return False
+
+ if len(self) <= 0:
+ return True
+
+ try:
+ self._result_cache.index(None)
+ return False
+ except ValueError:
+ # No ``None``s found in the results. Check the length of the cache.
+ return len(self._result_cache) > 0
+
+ def _manual_iter(self):
+ # If we're here, our cache isn't fully populated.
+ # For efficiency, fill the cache as we go if we run out of results.
+ # Also, this can't be part of the __iter__ method due to Python's rules
+ # about generator functions.
+ current_position = 0
+ current_cache_max = 0
+
+ while True:
+ if len(self._result_cache) > 0:
+ try:
+ current_cache_max = self._result_cache.index(None)
+ except ValueError:
+ current_cache_max = len(self._result_cache)
+
+ while current_position < current_cache_max:
+ yield self._result_cache[current_position]
+ current_position += 1
+
+ if self._cache_is_full():
+ raise StopIteration
+
+ # We've run out of results and haven't hit our limit.
+ # Fill more of the cache.
+ if not self._fill_cache(current_position, current_position + ITERATOR_LOAD_PER_QUERY):
+ raise StopIteration
+
+ def _fill_cache(self, start, end, **kwargs):
+ # Tell the query where to start from and how many we'd like.
+ self.query._reset()
+ self.query.set_limits(start, end)
+ results = self.query.get_results(**kwargs)
+
+ if results == None or len(results) == 0:
+ return False
+
+ # Setup the full cache now that we know how many results there are.
+ # We need the ``None``s as placeholders to know what parts of the
+ # cache we have/haven't filled.
+ # Using ``None`` like this takes up very little memory. In testing,
+ # an array of 100,000 ``None``s consumed less than .5 Mb, which ought
+ # to be an acceptable loss for consistent and more efficient caching.
+ if len(self._result_cache) == 0:
+ self._result_cache = [None for i in range(self.query.get_count())]
+
+ if start is None:
+ start = 0
+
+ if end is None:
+ end = self.query.get_count()
+
+ to_cache = self.post_process_results(results)
+
+ # Assign by slice.
+ self._result_cache[start:start + len(to_cache)] = to_cache
+ return True
+
+ def post_process_results(self, results):
+ to_cache = []
+
+ # Check if we wish to load all objects.
+ if self._load_all:
+ models_pks = {}
+ loaded_objects = {}
+
+ # Remember the search position for each result so we don't have to resort later.
+ for result in results:
+ models_pks.setdefault(result.model, []).append(result.pk)
+
+ # Load the objects for each model in turn.
+ for model in models_pks:
+ try:
+ ui = connections[self.query._using].get_unified_index()
+ index = ui.get_index(model)
+ objects = index.read_queryset(using=self.query._using)
+ loaded_objects[model] = objects.in_bulk(models_pks[model])
+ except NotHandled:
+ self.log.warning("Model '%s' not handled by the routers", model)
+ # Revert to old behaviour
+ loaded_objects[model] = model._default_manager.in_bulk(models_pks[model])
+
+ for result in results:
+ if self._load_all:
+ # We have to deal with integer keys being cast from strings
+ model_objects = loaded_objects.get(result.model, {})
+ if not result.pk in model_objects:
+ try:
+ result.pk = int(result.pk)
+ except ValueError:
+ pass
+ try:
+ result._object = model_objects[result.pk]
+ except KeyError:
+ # The object was either deleted since we indexed or should
+ # be ignored; fail silently.
+ self._ignored_result_count += 1
+ continue
+
+ to_cache.append(result)
+
+ return to_cache
+
+ def __getitem__(self, k):
+ """
+ Retrieves an item or slice from the set of results.
+ """
+ if not isinstance(k, (slice, six.integer_types)):
+ raise TypeError
+ assert ((not isinstance(k, slice) and (k >= 0))
+ or (isinstance(k, slice) and (k.start is None or k.start >= 0)
+ and (k.stop is None or k.stop >= 0))), \
+ "Negative indexing is not supported."
+
+ # Remember if it's a slice or not. We're going to treat everything as
+ # a slice to simply the logic and will `.pop()` at the end as needed.
+ if isinstance(k, slice):
+ is_slice = True
+ start = k.start
+
+ if k.stop is not None:
+ bound = int(k.stop)
+ else:
+ bound = None
+ else:
+ is_slice = False
+ start = k
+ bound = k + 1
+
+ # We need check to see if we need to populate more of the cache.
+ if len(self._result_cache) <= 0 or (None in self._result_cache[start:bound] and not self._cache_is_full()):
+ try:
+ self._fill_cache(start, bound)
+ except StopIteration:
+ # There's nothing left, even though the bound is higher.
+ pass
+
+ # Cache should be full enough for our needs.
+ if is_slice:
+ return self._result_cache[start:bound]
+ else:
+ return self._result_cache[start]
+
+ # Methods that return a SearchQuerySet.
+ def all(self):
+ """Returns all results for the query."""
+ return self._clone()
+
+ def none(self):
+ """Returns an empty result list for the query."""
+ return self._clone(klass=EmptySearchQuerySet)
+
+ def filter(self, *args, **kwargs):
+ """Narrows the search based on certain attributes and the default operator."""
+ if DEFAULT_OPERATOR == 'OR':
+ return self.filter_or(*args, **kwargs)
+ else:
+ return self.filter_and(*args, **kwargs)
+
+ def exclude(self, *args, **kwargs):
+ """Narrows the search by ensuring certain attributes are not included."""
+ clone = self._clone()
+ clone.query.add_filter(~SQ(*args, **kwargs))
+ return clone
+
+ def filter_and(self, *args, **kwargs):
+ """Narrows the search by looking for (and including) certain attributes."""
+ clone = self._clone()
+ clone.query.add_filter(SQ(*args, **kwargs))
+ return clone
+
+ def filter_or(self, *args, **kwargs):
+ """Narrows the search by ensuring certain attributes are not included."""
+ clone = self._clone()
+ clone.query.add_filter(SQ(*args, **kwargs), use_or=True)
+ return clone
+
+ def order_by(self, *args):
+ """Alters the order in which the results should appear."""
+ clone = self._clone()
+
+ for field in args:
+ clone.query.add_order_by(field)
+
+ return clone
+
+ def highlight(self):
+ """Adds highlighting to the results."""
+ clone = self._clone()
+ clone.query.add_highlight()
+ return clone
+
+ def models(self, *models):
+ """Accepts an arbitrary number of Model classes to include in the search."""
+ clone = self._clone()
+
+ for model in models:
+ if not model in connections[self.query._using].get_unified_index().get_indexed_models():
+ warnings.warn('The model %r is not registered for search.' % (model,))
+
+ clone.query.add_model(model)
+
+ return clone
+
+ def result_class(self, klass):
+ """
+ Allows specifying a different class to use for results.
+
+ Overrides any previous usages. If ``None`` is provided, Haystack will
+ revert back to the default ``SearchResult`` object.
+ """
+ clone = self._clone()
+ clone.query.set_result_class(klass)
+ return clone
+
+ def boost(self, term, boost):
+ """Boosts a certain aspect of the query."""
+ clone = self._clone()
+ clone.query.add_boost(term, boost)
+ return clone
+
+ def facet(self, field, **options):
+ """Adds faceting to a query for the provided field."""
+ clone = self._clone()
+ clone.query.add_field_facet(field, **options)
+ return clone
+
+ def within(self, field, point_1, point_2):
+ """Spatial: Adds a bounding box search to the query."""
+ clone = self._clone()
+ clone.query.add_within(field, point_1, point_2)
+ return clone
+
+ def dwithin(self, field, point, distance):
+ """Spatial: Adds a distance-based search to the query."""
+ clone = self._clone()
+ clone.query.add_dwithin(field, point, distance)
+ return clone
+
+ def stats(self, field):
+ """Adds stats to a query for the provided field."""
+ return self.stats_facet(field, facet_fields=None)
+
+ def stats_facet(self, field, facet_fields=None):
+ """Adds stats facet for the given field and facet_fields represents
+ the faceted fields."""
+ clone = self._clone()
+ stats_facets = []
+ try:
+ stats_facets.append(sum(facet_fields,[]))
+ except TypeError:
+ if facet_fields: stats_facets.append(facet_fields)
+ clone.query.add_stats_query(field,stats_facets)
+ return clone
+
+ def distance(self, field, point):
+ """
+ Spatial: Denotes results must have distance measurements from the
+ provided point.
+ """
+ clone = self._clone()
+ clone.query.add_distance(field, point)
+ return clone
+
+ def date_facet(self, field, start_date, end_date, gap_by, gap_amount=1):
+ """Adds faceting to a query for the provided field by date."""
+ clone = self._clone()
+ clone.query.add_date_facet(field, start_date, end_date, gap_by, gap_amount=gap_amount)
+ return clone
+
+ def query_facet(self, field, query):
+ """Adds faceting to a query for the provided field with a custom query."""
+ clone = self._clone()
+ clone.query.add_query_facet(field, query)
+ return clone
+
+ def narrow(self, query):
+ """Pushes existing facet choices into the search."""
+
+ if isinstance(query, SQ):
+ # produce query string using empty query of the same class
+ empty_query = self.query._clone()
+ empty_query._reset()
+ query = query.as_query_string(empty_query.build_query_fragment)
+
+ clone = self._clone()
+ clone.query.add_narrow_query(query)
+ return clone
+
+ def raw_search(self, query_string, **kwargs):
+ """Passes a raw query directly to the backend."""
+ return self.filter(content=Raw(query_string, **kwargs))
+
+ def load_all(self):
+ """Efficiently populates the objects in the search results."""
+ clone = self._clone()
+ clone._load_all = True
+ return clone
+
+ def auto_query(self, query_string, fieldname='content'):
+ """
+ Performs a best guess constructing the search query.
+
+ This method is somewhat naive but works well enough for the simple,
+ common cases.
+ """
+ kwargs = {
+ fieldname: AutoQuery(query_string)
+ }
+ return self.filter(**kwargs)
+
+ def autocomplete(self, **kwargs):
+ """
+ A shortcut method to perform an autocomplete search.
+
+ Must be run against fields that are either ``NgramField`` or
+ ``EdgeNgramField``.
+ """
+ clone = self._clone()
+ query_bits = []
+
+ for field_name, query in kwargs.items():
+ for word in query.split(' '):
+ bit = clone.query.clean(word.strip())
+ if bit:
+ kwargs = {
+ field_name: bit,
+ }
+ query_bits.append(SQ(**kwargs))
+
+ return clone.filter(six.moves.reduce(operator.__and__, query_bits))
+
+ def using(self, connection_name):
+ """
+ Allows switching which connection the ``SearchQuerySet`` uses to
+ search in.
+ """
+ clone = self._clone()
+ clone.query = self.query.using(connection_name)
+ clone._using = connection_name
+ return clone
+
+ # Methods that do not return a SearchQuerySet.
+
+ def count(self):
+ """Returns the total number of matching results."""
+ return len(self)
+
+ def best_match(self):
+ """Returns the best/top search result that matches the query."""
+ return self[0]
+
+ def latest(self, date_field):
+ """Returns the most recent search result that matches the query."""
+ clone = self._clone()
+ clone.query.clear_order_by()
+ clone.query.add_order_by("-%s" % date_field)
+ return clone.best_match()
+
+ def more_like_this(self, model_instance):
+ """Finds similar results to the object passed in."""
+ clone = self._clone()
+ clone.query.more_like_this(model_instance)
+ return clone
+
+ def facet_counts(self):
+ """
+ Returns the facet counts found by the query.
+
+ This will cause the query to execute and should generally be used when
+ presenting the data.
+ """
+ if self.query.has_run():
+ return self.query.get_facet_counts()
+ else:
+ clone = self._clone()
+ return clone.query.get_facet_counts()
+
+ def stats_results(self):
+ """
+ Returns the stats results found by the query.
+ """
+ if self.query.has_run():
+ return self.query.get_stats()
+ else:
+ clone = self._clone()
+ return clone.query.get_stats()
+
+ def spelling_suggestion(self, preferred_query=None):
+ """
+ Returns the spelling suggestion found by the query.
+
+ To work, you must set ``INCLUDE_SPELLING`` within your connection's
+ settings dictionary to ``True``. Otherwise, ``None`` will be returned.
+
+ This will cause the query to execute and should generally be used when
+ presenting the data.
+ """
+ if self.query.has_run():
+ return self.query.get_spelling_suggestion(preferred_query)
+ else:
+ clone = self._clone()
+ return clone.query.get_spelling_suggestion(preferred_query)
+
+ def values(self, *fields):
+ """
+ Returns a list of dictionaries, each containing the key/value pairs for
+ the result, exactly like Django's ``ValuesQuerySet``.
+ """
+ qs = self._clone(klass=ValuesSearchQuerySet)
+ qs._fields.extend(fields)
+ return qs
+
+ def values_list(self, *fields, **kwargs):
+ """
+ Returns a list of field values as tuples, exactly like Django's
+ ``QuerySet.values``.
+
+ Optionally accepts a ``flat=True`` kwarg, which in the case of a
+ single field being provided, will return a flat list of that field
+ rather than a list of tuples.
+ """
+ flat = kwargs.pop("flat", False)
+
+ if flat and len(fields) > 1:
+ raise TypeError("'flat' is not valid when values_list is called with more than one field.")
+
+ qs = self._clone(klass=ValuesListSearchQuerySet)
+ qs._fields.extend(fields)
+ qs._flat = flat
+ return qs
+
+ # Utility methods.
+
+ def _clone(self, klass=None):
+ if klass is None:
+ klass = self.__class__
+
+ query = self.query._clone()
+ clone = klass(query=query)
+ clone._load_all = self._load_all
+ return clone
+
+
+class EmptySearchQuerySet(SearchQuerySet):
+ """
+ A stubbed SearchQuerySet that behaves as normal but always returns no
+ results.
+ """
+ def __len__(self):
+ return 0
+
+ def _cache_is_full(self):
+ # Pretend the cache is always full with no results.
+ return True
+
+ def _clone(self, klass=None):
+ clone = super(EmptySearchQuerySet, self)._clone(klass=klass)
+ clone._result_cache = []
+ return clone
+
+ def _fill_cache(self, start, end):
+ return False
+
+ def facet_counts(self):
+ return {}
+
+
+class ValuesListSearchQuerySet(SearchQuerySet):
+ """
+ A ``SearchQuerySet`` which returns a list of field values as tuples, exactly
+ like Django's ``ValuesListQuerySet``.
+ """
+ def __init__(self, *args, **kwargs):
+ super(ValuesListSearchQuerySet, self).__init__(*args, **kwargs)
+ self._flat = False
+ self._fields = []
+
+ # Removing this dependency would require refactoring much of the backend
+ # code (_process_results, etc.) and these aren't large enough to make it
+ # an immediate priority:
+ self._internal_fields = ['id', 'django_ct', 'django_id', 'score']
+
+ def _clone(self, klass=None):
+ clone = super(ValuesListSearchQuerySet, self)._clone(klass=klass)
+ clone._fields = self._fields
+ clone._flat = self._flat
+ return clone
+
+ def _fill_cache(self, start, end):
+ query_fields = set(self._internal_fields)
+ query_fields.update(self._fields)
+ kwargs = {
+ 'fields': query_fields
+ }
+ return super(ValuesListSearchQuerySet, self)._fill_cache(start, end, **kwargs)
+
+ def post_process_results(self, results):
+ to_cache = []
+
+ if self._flat:
+ accum = to_cache.extend
+ else:
+ accum = to_cache.append
+
+ for result in results:
+ accum([getattr(result, i, None) for i in self._fields])
+
+ return to_cache
+
+
+class ValuesSearchQuerySet(ValuesListSearchQuerySet):
+ """
+ A ``SearchQuerySet`` which returns a list of dictionaries, each containing
+ the key/value pairs for the result, exactly like Django's
+ ``ValuesQuerySet``.
+ """
+ def _fill_cache(self, start, end):
+ query_fields = set(self._internal_fields)
+ query_fields.update(self._fields)
+ kwargs = {
+ 'fields': query_fields
+ }
+ return super(ValuesListSearchQuerySet, self)._fill_cache(start, end, **kwargs)
+
+ def post_process_results(self, results):
+ to_cache = []
+
+ for result in results:
+ to_cache.append(dict((i, getattr(result, i, None)) for i in self._fields))
+
+ return to_cache
+
+
+class RelatedSearchQuerySet(SearchQuerySet):
+ """
+ A variant of the SearchQuerySet that can handle `load_all_queryset`s.
+
+ This is predominantly different in the `_fill_cache` method, as it is
+ far less efficient but needs to fill the cache before it to maintain
+ consistency.
+ """
+
+ def __init__(self, *args, **kwargs):
+ super(RelatedSearchQuerySet, self).__init__(*args, **kwargs)
+ self._load_all_querysets = {}
+ self._result_cache = []
+
+ def _cache_is_full(self):
+ return len(self._result_cache) >= len(self)
+
+ def _manual_iter(self):
+ # If we're here, our cache isn't fully populated.
+ # For efficiency, fill the cache as we go if we run out of results.
+ # Also, this can't be part of the __iter__ method due to Python's rules
+ # about generator functions.
+ current_position = 0
+ current_cache_max = 0
+
+ while True:
+ current_cache_max = len(self._result_cache)
+
+ while current_position < current_cache_max:
+ yield self._result_cache[current_position]
+ current_position += 1
+
+ if self._cache_is_full():
+ raise StopIteration
+
+ # We've run out of results and haven't hit our limit.
+ # Fill more of the cache.
+ start = current_position + self._ignored_result_count
+
+ if not self._fill_cache(start, start + ITERATOR_LOAD_PER_QUERY):
+ raise StopIteration
+
+ def _fill_cache(self, start, end):
+ # Tell the query where to start from and how many we'd like.
+ self.query._reset()
+ self.query.set_limits(start, end)
+ results = self.query.get_results()
+
+ if len(results) == 0:
+ return False
+
+ if start is None:
+ start = 0
+
+ if end is None:
+ end = self.query.get_count()
+
+ # Check if we wish to load all objects.
+ if self._load_all:
+ models_pks = {}
+ loaded_objects = {}
+
+ # Remember the search position for each result so we don't have to resort later.
+ for result in results:
+ models_pks.setdefault(result.model, []).append(result.pk)
+
+ # Load the objects for each model in turn.
+ for model in models_pks:
+ if model in self._load_all_querysets:
+ # Use the overriding queryset.
+ loaded_objects[model] = self._load_all_querysets[model].in_bulk(models_pks[model])
+ else:
+ # Check the SearchIndex for the model for an override.
+ try:
+ index = connections[self.query._using].get_unified_index().get_index(model)
+ qs = index.load_all_queryset()
+ loaded_objects[model] = qs.in_bulk(models_pks[model])
+ except NotHandled:
+ # The model returned doesn't seem to be handled by the
+ # routers. We should silently fail and populate
+ # nothing for those objects.
+ loaded_objects[model] = []
+
+ if len(results) + len(self._result_cache) < len(self) and len(results) < ITERATOR_LOAD_PER_QUERY:
+ self._ignored_result_count += ITERATOR_LOAD_PER_QUERY - len(results)
+
+ for result in results:
+ if self._load_all:
+ # We have to deal with integer keys being cast from strings; if this
+ # fails we've got a character pk.
+ try:
+ result.pk = int(result.pk)
+ except ValueError:
+ pass
+ try:
+ result._object = loaded_objects[result.model][result.pk]
+ except (KeyError, IndexError):
+ # The object was either deleted since we indexed or should
+ # be ignored; fail silently.
+ self._ignored_result_count += 1
+ continue
+
+ self._result_cache.append(result)
+
+ return True
+
+ def __getitem__(self, k):
+ """
+ Retrieves an item or slice from the set of results.
+ """
+ if not isinstance(k, (slice, six.integer_types)):
+ raise TypeError
+ assert ((not isinstance(k, slice) and (k >= 0))
+ or (isinstance(k, slice) and (k.start is None or k.start >= 0)
+ and (k.stop is None or k.stop >= 0))), \
+ "Negative indexing is not supported."
+
+ # Remember if it's a slice or not. We're going to treat everything as
+ # a slice to simply the logic and will `.pop()` at the end as needed.
+ if isinstance(k, slice):
+ is_slice = True
+ start = k.start
+
+ if k.stop is not None:
+ bound = int(k.stop)
+ else:
+ bound = None
+ else:
+ is_slice = False
+ start = k
+ bound = k + 1
+
+ # We need check to see if we need to populate more of the cache.
+ if len(self._result_cache) <= 0 or not self._cache_is_full():
+ try:
+ while len(self._result_cache) < bound and not self._cache_is_full():
+ current_max = len(self._result_cache) + self._ignored_result_count
+ self._fill_cache(current_max, current_max + ITERATOR_LOAD_PER_QUERY)
+ except StopIteration:
+ # There's nothing left, even though the bound is higher.
+ pass
+
+ # Cache should be full enough for our needs.
+ if is_slice:
+ return self._result_cache[start:bound]
+ else:
+ return self._result_cache[start]
+
+ def load_all_queryset(self, model, queryset):
+ """
+ Allows for specifying a custom ``QuerySet`` that changes how ``load_all``
+ will fetch records for the provided model.
+
+ This is useful for post-processing the results from the query, enabling
+ things like adding ``select_related`` or filtering certain data.
+ """
+ clone = self._clone()
+ clone._load_all_querysets[model] = queryset
+ return clone
+
+ def _clone(self, klass=None):
+ if klass is None:
+ klass = self.__class__
+
+ query = self.query._clone()
+ clone = klass(query=query)
+ clone._load_all = self._load_all
+ clone._load_all_querysets = self._load_all_querysets
+ return clone
diff --git a/haystack/routers.py b/haystack/routers.py
new file mode 100644
index 0000000..0a77e17
--- /dev/null
+++ b/haystack/routers.py
@@ -0,0 +1,18 @@
+# encoding: utf-8
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+from haystack.constants import DEFAULT_ALIAS
+
+
+class BaseRouter(object):
+ # Reserved for future extension.
+ pass
+
+
+class DefaultRouter(BaseRouter):
+ def for_read(self, **hints):
+ return DEFAULT_ALIAS
+
+ def for_write(self, **hints):
+ return DEFAULT_ALIAS
diff --git a/haystack/signals.py b/haystack/signals.py
new file mode 100644
index 0000000..63a6c5f
--- /dev/null
+++ b/haystack/signals.py
@@ -0,0 +1,90 @@
+# encoding: utf-8
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+from django.db import models
+
+from haystack.exceptions import NotHandled
+
+
+class BaseSignalProcessor(object):
+ """
+ A convenient way to attach Haystack to Django's signals & cause things to
+ index.
+
+ By default, does nothing with signals but provides underlying functionality.
+ """
+ def __init__(self, connections, connection_router):
+ self.connections = connections
+ self.connection_router = connection_router
+ self.setup()
+
+ def setup(self):
+ """
+ A hook for setting up anything necessary for
+ ``handle_save/handle_delete`` to be executed.
+
+ Default behavior is to do nothing (``pass``).
+ """
+ # Do nothing.
+ pass
+
+ def teardown(self):
+ """
+ A hook for tearing down anything necessary for
+ ``handle_save/handle_delete`` to no longer be executed.
+
+ Default behavior is to do nothing (``pass``).
+ """
+ # Do nothing.
+ pass
+
+ def handle_save(self, sender, instance, **kwargs):
+ """
+ Given an individual model instance, determine which backends the
+ update should be sent to & update the object on those backends.
+ """
+ using_backends = self.connection_router.for_write(instance=instance)
+
+ for using in using_backends:
+ try:
+ index = self.connections[using].get_unified_index().get_index(sender)
+ index.update_object(instance, using=using)
+ except NotHandled:
+ # TODO: Maybe log it or let the exception bubble?
+ pass
+
+ def handle_delete(self, sender, instance, **kwargs):
+ """
+ Given an individual model instance, determine which backends the
+ delete should be sent to & delete the object on those backends.
+ """
+ using_backends = self.connection_router.for_write(instance=instance)
+
+ for using in using_backends:
+ try:
+ index = self.connections[using].get_unified_index().get_index(sender)
+ index.remove_object(instance, using=using)
+ except NotHandled:
+ # TODO: Maybe log it or let the exception bubble?
+ pass
+
+
+class RealtimeSignalProcessor(BaseSignalProcessor):
+ """
+ Allows for observing when saves/deletes fire & automatically updates the
+ search engine appropriately.
+ """
+ def setup(self):
+ # Naive (listen to all model saves).
+ models.signals.post_save.connect(self.handle_save)
+ models.signals.post_delete.connect(self.handle_delete)
+ # Efficient would be going through all backends & collecting all models
+ # being used, then hooking up signals only for those.
+
+ def teardown(self):
+ # Naive (listen to all model saves).
+ models.signals.post_save.disconnect(self.handle_save)
+ models.signals.post_delete.disconnect(self.handle_delete)
+ # Efficient would be going through all backends & collecting all models
+ # being used, then disconnecting signals only for those.
diff --git a/haystack/templates/panels/haystack.html b/haystack/templates/panels/haystack.html
new file mode 100644
index 0000000..1896658
--- /dev/null
+++ b/haystack/templates/panels/haystack.html
@@ -0,0 +1,33 @@
+{% load i18n %}
+<table>
+ <thead>
+ <tr>
+ <th style="width: 50%">{% trans 'Query' %}</th>
+ <th style="width: 10%">{% trans 'Backend Alias' %}</th>
+ <th style="width: 25%">{% trans 'Timeline' %}</th>
+ <th style="width: 5%">{% trans 'Time' %}&nbsp;(ms)</th>
+ <th style="width: 10%">{% trans 'Kwargs' %}</th>
+ </tr>
+ </thead>
+ <tbody>
+ {% for query in queries %}
+ <tr class="{% cycle 'djDebugOdd' 'djDebugEven' %}">
+ <td class="syntax">
+ <div class="djDebugSqlWrap">
+ <div class="djDebugSql">{{ query.query_string|safe }}</div>
+ </div>
+ </td>
+ <td>{{ query.alias }}</td>
+ <td>
+ <span class="djDebugLineChart{% if query.is_slow %} djDebugLineChartWarning{% endif %}" style="width:{{ query.width_ratio }}%; left:{{ query.start_offset }}%; position: relative;">&nbsp;</span>
+ </td>
+ <td>{{ query.time }}</td>
+ <td>
+ {% for key, value in query.additional_kwargs.items %}
+ <strong>'{{ key }}':</strong> {{ value|stringformat:"r" }}<br>
+ {% endfor %}
+ </td>
+ </tr>
+ {% endfor %}
+ </tbody>
+</table>
diff --git a/haystack/templates/search_configuration/solr.xml b/haystack/templates/search_configuration/solr.xml
new file mode 100644
index 0000000..03fed3b
--- /dev/null
+++ b/haystack/templates/search_configuration/solr.xml
@@ -0,0 +1,166 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<schema name="default" version="1.5">
+ <types>
+ <fieldtype name="string" class="solr.StrField" sortMissingLast="true" omitNorms="true"/>
+ <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" omitNorms="true"/>
+ <fieldtype name="binary" class="solr.BinaryField"/>
+
+ <!-- Numeric field types that manipulate the value into
+ a string value that isn't human-readable in its internal form,
+ but with a lexicographic ordering the same as the numeric ordering,
+ so that range queries work correctly. -->
+ <fieldType name="int" class="solr.TrieIntField" precisionStep="0" omitNorms="true" sortMissingLast="true" positionIncrementGap="0"/>
+ <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" omitNorms="true" sortMissingLast="true" positionIncrementGap="0"/>
+ <fieldType name="long" class="solr.TrieLongField" precisionStep="0" omitNorms="true" sortMissingLast="true" positionIncrementGap="0"/>
+ <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" omitNorms="true" sortMissingLast="true" positionIncrementGap="0"/>
+ <fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true"/>
+ <fieldType name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="true"/>
+ <fieldType name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="true"/>
+ <fieldType name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="true"/>
+
+ <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
+ <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
+ <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
+ <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
+
+ <fieldType name="date" class="solr.TrieDateField" omitNorms="true" precisionStep="0" positionIncrementGap="0"/>
+ <!-- A Trie based date field for faster date range queries and date faceting. -->
+ <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" precisionStep="6" positionIncrementGap="0"/>
+
+ <fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>
+ <fieldType name="location" class="solr.LatLonType" subFieldSuffix="_coordinate"/>
+ <fieldtype name="geohash" class="solr.GeoHashField"/>
+
+ <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
+ <analyzer type="index">
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
+ <!-- in this example, we will only use synonyms at query time
+ <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
+ -->
+ <filter class="solr.LowerCaseFilterFactory"/>
+ </analyzer>
+ <analyzer type="query">
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
+ <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
+ <analyzer type="index">
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <filter class="solr.StopFilterFactory"
+ ignoreCase="true"
+ words="lang/stopwords_en.txt"
+ enablePositionIncrements="true"
+ />
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+ <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
+ -->
+ <filter class="solr.PorterStemFilterFactory"/>
+ </analyzer>
+ <analyzer type="query">
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+ <filter class="solr.StopFilterFactory"
+ ignoreCase="true"
+ words="lang/stopwords_en.txt"
+ enablePositionIncrements="true"
+ />
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.EnglishPossessiveFilterFactory"/>
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+ <!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
+ <filter class="solr.EnglishMinimalStemFilterFactory"/>
+ -->
+ <filter class="solr.PorterStemFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
+ <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ </analyzer>
+ </fieldType>
+
+ <fieldType name="ngram" class="solr.TextField" >
+ <analyzer type="index">
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.NGramFilterFactory" minGramSize="3" maxGramSize="15" />
+ </analyzer>
+ <analyzer type="query">
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
+ <fieldType name="edge_ngram" class="solr.TextField" positionIncrementGap="1">
+ <analyzer type="index">
+ <tokenizer class="solr.WhitespaceTokenizerFactory" />
+ <filter class="solr.LowerCaseFilterFactory" />
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
+ <filter class="solr.EdgeNGramFilterFactory" minGramSize="2" maxGramSize="15" side="front" />
+ </analyzer>
+ <analyzer type="query">
+ <tokenizer class="solr.WhitespaceTokenizerFactory" />
+ <filter class="solr.LowerCaseFilterFactory" />
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
+ </analyzer>
+ </fieldType>
+ </types>
+
+ <fields>
+ <!-- general -->
+ <field name="{{ ID }}" type="string" indexed="true" stored="true" multiValued="false" required="true"/>
+ <field name="{{ DJANGO_CT }}" type="string" indexed="true" stored="true" multiValued="false"/>
+ <field name="{{ DJANGO_ID }}" type="string" indexed="true" stored="true" multiValued="false"/>
+ <field name="_version_" type="long" indexed="true" stored ="true"/>
+
+ <dynamicField name="*_i" type="int" indexed="true" stored="true"/>
+ <dynamicField name="*_s" type="string" indexed="true" stored="true"/>
+ <dynamicField name="*_l" type="long" indexed="true" stored="true"/>
+ <dynamicField name="*_t" type="text_en" indexed="true" stored="true"/>
+ <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
+ <dynamicField name="*_f" type="float" indexed="true" stored="true"/>
+ <dynamicField name="*_d" type="double" indexed="true" stored="true"/>
+ <dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
+ <dynamicField name="*_p" type="location" indexed="true" stored="true"/>
+ <dynamicField name="*_coordinate" type="tdouble" indexed="true" stored="false"/>
+
+{% for field in fields %}
+ <field name="{{ field.field_name }}" type="{{ field.type }}" indexed="{{ field.indexed }}" stored="{{ field.stored }}" multiValued="{{ field.multi_valued }}" />
+{% endfor %}
+ </fields>
+
+ <!-- field to use to determine and enforce document uniqueness. -->
+ <uniqueKey>{{ ID }}</uniqueKey>
+
+ <!-- field for the QueryParser to use when an explicit fieldname is absent -->
+ <defaultSearchField>{{ content_field_name }}</defaultSearchField>
+
+ <!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
+ <solrQueryParser defaultOperator="{{ default_operator }}"/>
+</schema>
diff --git a/haystack/templatetags/__init__.py b/haystack/templatetags/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/haystack/templatetags/__init__.py
diff --git a/haystack/templatetags/highlight.py b/haystack/templatetags/highlight.py
new file mode 100644
index 0000000..f1ca697
--- /dev/null
+++ b/haystack/templatetags/highlight.py
@@ -0,0 +1,119 @@
+# encoding: utf-8
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+from django import template
+from django.conf import settings
+from django.core.exceptions import ImproperlyConfigured
+from django.utils import six
+
+from haystack.utils import importlib
+
+
+register = template.Library()
+
+
+class HighlightNode(template.Node):
+ def __init__(self, text_block, query, html_tag=None, css_class=None, max_length=None):
+ self.text_block = template.Variable(text_block)
+ self.query = template.Variable(query)
+ self.html_tag = html_tag
+ self.css_class = css_class
+ self.max_length = max_length
+
+ if html_tag is not None:
+ self.html_tag = template.Variable(html_tag)
+
+ if css_class is not None:
+ self.css_class = template.Variable(css_class)
+
+ if max_length is not None:
+ self.max_length = template.Variable(max_length)
+
+ def render(self, context):
+ text_block = self.text_block.resolve(context)
+ query = self.query.resolve(context)
+ kwargs = {}
+
+ if self.html_tag is not None:
+ kwargs['html_tag'] = self.html_tag.resolve(context)
+
+ if self.css_class is not None:
+ kwargs['css_class'] = self.css_class.resolve(context)
+
+ if self.max_length is not None:
+ kwargs['max_length'] = self.max_length.resolve(context)
+
+ # Handle a user-defined highlighting function.
+ if hasattr(settings, 'HAYSTACK_CUSTOM_HIGHLIGHTER') and settings.HAYSTACK_CUSTOM_HIGHLIGHTER:
+ # Do the import dance.
+ try:
+ path_bits = settings.HAYSTACK_CUSTOM_HIGHLIGHTER.split('.')
+ highlighter_path, highlighter_classname = '.'.join(path_bits[:-1]), path_bits[-1]
+ highlighter_module = importlib.import_module(highlighter_path)
+ highlighter_class = getattr(highlighter_module, highlighter_classname)
+ except (ImportError, AttributeError) as e:
+ raise ImproperlyConfigured("The highlighter '%s' could not be imported: %s" % (settings.HAYSTACK_CUSTOM_HIGHLIGHTER, e))
+ else:
+ from haystack.utils import Highlighter
+ highlighter_class = Highlighter
+
+ highlighter = highlighter_class(query, **kwargs)
+ highlighted_text = highlighter.highlight(text_block)
+ return highlighted_text
+
+
+@register.tag
+def highlight(parser, token):
+ """
+ Takes a block of text and highlights words from a provided query within that
+ block of text. Optionally accepts arguments to provide the HTML tag to wrap
+ highlighted word in, a CSS class to use with the tag and a maximum length of
+ the blurb in characters.
+
+ Syntax::
+
+ {% highlight <text_block> with <query> [css_class "class_name"] [html_tag "span"] [max_length 200] %}
+
+ Example::
+
+ # Highlight summary with default behavior.
+ {% highlight result.summary with request.query %}
+
+ # Highlight summary but wrap highlighted words with a div and the
+ # following CSS class.
+ {% highlight result.summary with request.query html_tag "div" css_class "highlight_me_please" %}
+
+ # Highlight summary but only show 40 characters.
+ {% highlight result.summary with request.query max_length 40 %}
+ """
+ bits = token.split_contents()
+ tag_name = bits[0]
+
+ if not len(bits) % 2 == 0:
+ raise template.TemplateSyntaxError(u"'%s' tag requires valid pairings arguments." % tag_name)
+
+ text_block = bits[1]
+
+ if len(bits) < 4:
+ raise template.TemplateSyntaxError(u"'%s' tag requires an object and a query provided by 'with'." % tag_name)
+
+ if bits[2] != 'with':
+ raise template.TemplateSyntaxError(u"'%s' tag's second argument should be 'with'." % tag_name)
+
+ query = bits[3]
+
+ arg_bits = iter(bits[4:])
+ kwargs = {}
+
+ for bit in arg_bits:
+ if bit == 'css_class':
+ kwargs['css_class'] = six.next(arg_bits)
+
+ if bit == 'html_tag':
+ kwargs['html_tag'] = six.next(arg_bits)
+
+ if bit == 'max_length':
+ kwargs['max_length'] = six.next(arg_bits)
+
+ return HighlightNode(text_block, query, **kwargs)
diff --git a/haystack/templatetags/more_like_this.py b/haystack/templatetags/more_like_this.py
new file mode 100644
index 0000000..edad111
--- /dev/null
+++ b/haystack/templatetags/more_like_this.py
@@ -0,0 +1,108 @@
+# encoding: utf-8
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+from django import template
+from django.db import models
+
+from haystack.query import SearchQuerySet
+
+register = template.Library()
+
+
+class MoreLikeThisNode(template.Node):
+ def __init__(self, model, varname, for_types=None, limit=None):
+ self.model = template.Variable(model)
+ self.varname = varname
+ self.for_types = for_types
+ self.limit = limit
+
+ if not self.limit is None:
+ self.limit = int(self.limit)
+
+ def render(self, context):
+ try:
+ model_instance = self.model.resolve(context)
+ sqs = SearchQuerySet()
+
+ if not self.for_types is None:
+ intermediate = template.Variable(self.for_types)
+ for_types = intermediate.resolve(context).split(',')
+ search_models = []
+
+ for model in for_types:
+ model_class = models.get_model(*model.split('.'))
+
+ if model_class:
+ search_models.append(model_class)
+
+ sqs = sqs.models(*search_models)
+
+ sqs = sqs.more_like_this(model_instance)
+
+ if not self.limit is None:
+ sqs = sqs[:self.limit]
+
+ context[self.varname] = sqs
+ except:
+ pass
+
+ return ''
+
+
+@register.tag
+def more_like_this(parser, token):
+ """
+ Fetches similar items from the search index to find content that is similar
+ to the provided model's content.
+
+ Syntax::
+
+ {% more_like_this model_instance as varname [for app_label.model_name,app_label.model_name,...] [limit n] %}
+
+ Example::
+
+ # Pull a full SearchQuerySet (lazy loaded) of similar content.
+ {% more_like_this entry as related_content %}
+
+ # Pull just the top 5 similar pieces of content.
+ {% more_like_this entry as related_content limit 5 %}
+
+ # Pull just the top 5 similar entries or comments.
+ {% more_like_this entry as related_content for "blog.entry,comments.comment" limit 5 %}
+ """
+ bits = token.split_contents()
+
+ if not len(bits) in (4, 6, 8):
+ raise template.TemplateSyntaxError(u"'%s' tag requires either 3, 5 or 7 arguments." % bits[0])
+
+ model = bits[1]
+
+ if bits[2] != 'as':
+ raise template.TemplateSyntaxError(u"'%s' tag's second argument should be 'as'." % bits[0])
+
+ varname = bits[3]
+ limit = None
+ for_types = None
+
+ if len(bits) == 6:
+ if bits[4] != 'limit' and bits[4] != 'for':
+ raise template.TemplateSyntaxError(u"'%s' tag's fourth argument should be either 'limit' or 'for'." % bits[0])
+
+ if bits[4] == 'limit':
+ limit = bits[5]
+ else:
+ for_types = bits[5]
+
+ if len(bits) == 8:
+ if bits[4] != 'for':
+ raise template.TemplateSyntaxError(u"'%s' tag's fourth argument should be 'for'." % bits[0])
+
+ for_types = bits[5]
+
+ if bits[6] != 'limit':
+ raise template.TemplateSyntaxError(u"'%s' tag's sixth argument should be 'limit'." % bits[0])
+
+ limit = bits[7]
+
+ return MoreLikeThisNode(model, varname, for_types, limit)
diff --git a/haystack/urls.py b/haystack/urls.py
new file mode 100644
index 0000000..4b96615
--- /dev/null
+++ b/haystack/urls.py
@@ -0,0 +1,16 @@
+# encoding: utf-8
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+from haystack.views import SearchView
+
+try:
+ from django.conf.urls import patterns, url
+except ImportError:
+ from django.conf.urls.defaults import patterns, url
+
+
+
+urlpatterns = patterns('haystack.views',
+ url(r'^$', SearchView(), name='haystack_search'),
+)
diff --git a/haystack/utils/__init__.py b/haystack/utils/__init__.py
new file mode 100644
index 0000000..659f2f5
--- /dev/null
+++ b/haystack/utils/__init__.py
@@ -0,0 +1,88 @@
+# encoding: utf-8
+
+from __future__ import unicode_literals
+import re
+
+import django
+from django.conf import settings
+from django.utils import six
+
+from haystack.constants import ID, DJANGO_CT, DJANGO_ID
+from haystack.utils.highlighting import Highlighter
+
+try:
+ # Introduced in Python 2.7
+ import importlib
+except ImportError:
+ # Deprecated in Django 1.8; removed in Django 1.9 (both of which require
+ # at least Python 2.7)
+ from django.utils import importlib
+
+IDENTIFIER_REGEX = re.compile('^[\w\d_]+\.[\w\d_]+\.\d+$')
+
+
+def default_get_identifier(obj_or_string):
+ """
+ Get an unique identifier for the object or a string representing the
+ object.
+
+ If not overridden, uses <app_label>.<object_name>.<pk>.
+ """
+ if isinstance(obj_or_string, six.string_types):
+ if not IDENTIFIER_REGEX.match(obj_or_string):
+ raise AttributeError(u"Provided string '%s' is not a valid identifier." % obj_or_string)
+
+ return obj_or_string
+
+ return u"%s.%s" % (get_model_ct(obj_or_string),
+ obj_or_string._get_pk_val())
+
+
+def _lookup_identifier_method():
+ """
+ If the user has set HAYSTACK_IDENTIFIER_METHOD, import it and return the method uncalled.
+ If HAYSTACK_IDENTIFIER_METHOD is not defined, return haystack.utils.default_get_identifier.
+
+ This always runs at module import time. We keep the code in a function
+ so that it can be called from unit tests, in order to simulate the re-loading
+ of this module.
+ """
+ if not hasattr(settings, 'HAYSTACK_IDENTIFIER_METHOD'):
+ return default_get_identifier
+
+ module_path, method_name = settings.HAYSTACK_IDENTIFIER_METHOD.rsplit(".", 1)
+
+ try:
+ module = importlib.import_module(module_path)
+ except ImportError:
+ raise ImportError(u"Unable to import module '%s' provided for HAYSTACK_IDENTIFIER_METHOD." % module_path)
+
+ identifier_method = getattr(module, method_name, None)
+
+ if not identifier_method:
+ raise AttributeError(
+ u"Provided method '%s' for HAYSTACK_IDENTIFIER_METHOD does not exist in '%s'." % (method_name, module_path)
+ )
+
+ return identifier_method
+
+
+get_identifier = _lookup_identifier_method()
+
+
+if django.VERSION >= (1, 6):
+ def get_model_ct_tuple(model):
+ return (model._meta.app_label, model._meta.model_name)
+else:
+ def get_model_ct_tuple(model):
+ return (model._meta.app_label, model._meta.module_name)
+
+def get_model_ct(model):
+ return "%s.%s" % get_model_ct_tuple(model)
+
+
+def get_facet_field_name(fieldname):
+ if fieldname in [ID, DJANGO_ID, DJANGO_CT]:
+ return fieldname
+
+ return "%s_exact" % fieldname
diff --git a/haystack/utils/app_loading.py b/haystack/utils/app_loading.py
new file mode 100755
index 0000000..efba6ad
--- /dev/null
+++ b/haystack/utils/app_loading.py
@@ -0,0 +1,90 @@
+# encoding: utf-8
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+from django import VERSION as DJANGO_VERSION
+from django.conf import settings
+from django.core.exceptions import ImproperlyConfigured
+
+from haystack.utils import importlib
+
+__all__ = ['haystack_get_models', 'haystack_load_apps']
+
+APP = 'app'
+MODEL = 'model'
+
+if DJANGO_VERSION >= (1, 7):
+ from django.apps import apps
+
+ def haystack_get_app_modules():
+ """Return the Python module for each installed app"""
+ return [i.module for i in apps.get_app_configs()]
+
+ def haystack_load_apps():
+ """Return a list of app labels for all installed applications which have models"""
+ return [i.label for i in apps.get_app_configs() if i.models_module is not None]
+
+ def haystack_get_models(label):
+ try:
+ app_mod = apps.get_app_config(label)
+ return app_mod.get_models()
+ except LookupError:
+ if '.' not in label:
+ raise ImproperlyConfigured('Unknown application label {}'.format(label))
+ app_label, model_name = label.rsplit('.', 1)
+ return [apps.get_model(app_label, model_name)]
+ except ImproperlyConfigured:
+ pass
+
+ def haystack_get_model(app_label, model_name):
+ return apps.get_model(app_label, model_name)
+
+else:
+ from django.db.models.loading import get_app, get_model, get_models
+
+ def is_app_or_model(label):
+ label_bits = label.split('.')
+
+ if len(label_bits) == 1:
+ return APP
+ elif len(label_bits) == 2:
+ try:
+ get_model(*label_bits)
+ except LookupError:
+ return APP
+ return MODEL
+ else:
+ raise ImproperlyConfigured(
+ "'%s' isn't recognized as an app (<app_label>) or model (<app_label>.<model_name>)." % label)
+
+ def haystack_get_app_modules():
+ """Return the Python module for each installed app"""
+ return [importlib.import_module(i) for i in settings.INSTALLED_APPS]
+
+ def haystack_load_apps():
+ # Do all, in an INSTALLED_APPS sorted order.
+ items = []
+
+ for app in settings.INSTALLED_APPS:
+ app_label = app.split('.')[-1]
+
+ try:
+ get_app(app_label)
+ except ImproperlyConfigured:
+ continue # Intentionally allow e.g. apps without models.py
+
+ items.append(app_label)
+
+ return items
+
+ def haystack_get_models(label):
+ app_or_model = is_app_or_model(label)
+
+ if app_or_model == APP:
+ app_mod = get_app(label)
+ return get_models(app_mod)
+ else:
+ app_label, model_name = label.rsplit('.', 1)
+ return [get_model(app_label, model_name)]
+
+ def haystack_get_model(app_label, model_name):
+ return get_model(app_label, model_name)
diff --git a/haystack/utils/geo.py b/haystack/utils/geo.py
new file mode 100644
index 0000000..d3b87da
--- /dev/null
+++ b/haystack/utils/geo.py
@@ -0,0 +1,78 @@
+# encoding: utf-8