pylucene-4.10.1-1/000755 000765 000000 00000000000 12413103702 013773 5ustar00vajdawheel000000 000000 pylucene-4.10.1-1/CHANGES000644 000765 000000 00000017635 12412400076 015005 0ustar00vajdawheel000000 000000 Version 4.9.0 -> 4.10.1 ----------------------- - using Lucene 4.10.1 sources - PyLucene built with JCC 2.21 Version 4.8.0 -> 4.9.0 ---------------------- - using Lucene 4.9.0 sources - FacetSample.py fixed to work with Lucene 4.9 facets API (Thomas Koch) - PyLucene built with JCC 2.20 Version 4.7.2 -> 4.8.0 ---------------------- - using Lucene 4.8.0 sources - PyLucene built with JCC 2.19 - Lucene now requires Java 7 at the minimum, Java 6 is no longer supported Version 4.6.1 -> 4.7.2 ---------------------- - using Lucene 4.7.2 sources - PyLucene built with JCC 2.19 Version 4.7.2 -> 4.8.0 ---------------------- - using Lucene 4.8.0 sources - PyLucene built with JCC 2.19 - Lucene now requires Java 7 at the minimum, Java 6 is no longer supported Version 4.6.1 -> 4.7.2 ---------------------- - using Lucene 4.7.2 sources - PyLucene built with JCC 2.19 Version 4.5.1 -> 4.6.1 ---------------------- - using Lucene 4.6.1 sources - PyLucene built with JCC 2.19 Version 4.4.0 -> 4.5.1 ---------------------- - using Lucene 4.5.1 sources - PyLucene built with JCC 2.18 Version 4.3.0 -> 4.4.0 ---------------------- - added vmargs=['-Djava.awt.headless=true'] to all initVM() calls - using Lucene 4.4.0 sources - added wrapping of polish analyzer and stemmer - added inclusion of misc.jar because of cross-dependencies - PyLucene built with JCC 2.17 Version 3.6.2 -> 4.3.0 ---------------------- - switched build to --use_full_names, Python wrappers now follow Java packages - removed all --rename and most --exclude entries from jcc command line - removed Lucene in Action samples as they're incompatible with the 4.x API - migrated all unit tests and remaining samples to Lucene 4.x API - migrated FacetExample.py to latest 4.x facets API (Thomas Koch) - PyLucene built with JCC 2.16 Version 3.6.1 -> 3.6.2 ---------------------- - using Lucene 3.6.2 sources - PyLucene built with JCC 2.15 Version 3.6.0 -> 3.6.1 ---------------------- - using Lucene 3.6.1 sources - PyLucene built with JCC 2.14 Version 3.5.0 -> 3.6.0 ---------------------- - using Lucene 3.6.0 sources - renamed classes whose python name would not be unique in lucene module - refreshed Linux build options, added an OpenJDK 7 example - added JavaList to collections.py, a Python java.util.List (Thomas Koch) - added samples/FacetExample.py (Thomas Koch) - PyLucene built with JCC 2.13 Version 3.4 -> 3.5.0 -------------------- - using Lucene 3.5 sources - added facet contrib module to build - refreshed SynonymAnalyzerViewer sample and wordnet index (Thomas Koch) - added PythonReusableAnalyzerBase (Michael McCandless) - added PythonIndexDeletionPolicy.java (Michael McCandless) - added spellchecker contrib module to build - PyLucene built with JCC 2.12 Version 3.3 -> 3.4 ------------------ - added new join contrib module to build - PyLucene built with JCC 2.11 Version 3.2 -> 3.3 ------------------ - using Lucene 3.3 sources - adapted to FieldComparator becoming generic - added new grouping contrib module to build - PyLucene built with JCC 2.10 Version 3.1.0 -> 3.2 -------------------- - using Lucene 3.2 sources - PyLucene built with JCC 2.9 - rearranged Lucene source checkout tree to reflect new constraints Version 3.0.0 -> 3.1.0 ---------------------- - using Lucene 3.1 sources - improved support for building on Windows with mingw32 - added wininst target to Makefile - added port of ICUNormalizer2Filter using C++ ICU's Normalizer2 via PyICU 1.1 - added port of ICUFoldingFilter using C++ ICU's Normalizer2 via PyICU 1.1 - added port of ICUTransformFilter using C++ ICU's Transliterator via PyICU 1.1 - fixed "Lucene in Action" samples left over on old API - improved support for adding optional contrib modules - added --package java.util.regex to wrap constructors on PatternAnalyzer - fixed mansearch.py sample to reflect API changes - PyLucene built with JCC 2.8 Version 2.9.0 -> 3.0.0 ---------------------- - unit tests ported to new API - removed InstantiatedIndex contrib from default build - with JCC 2.5's Java generics support, a lot less downcasting needed - Java Lucene sources now included in PyLucene source distribution - "Lucene in Action" samples and tests converted to new Lucene 3.0 API - PyLucene built with JCC 2.5 Version 2.4.1 -> 2.9.0 ---------------------- - renamed the Highlighter's SpanScorer class to HighlighterSpanScorer - fixed bug in Makefile's test target which tested installed build - added Mac OS X 10.6 sections to Makefile - added FieldCache.Parser Python extension classes (used in test/test_Sort.py) - added FieldComparator and FieldComparatorSource Python extension classes - added 'memory' contrib module to default build - PyLucene built with JCC 2.4 Version 2.4.0 -> 2.4.1 ---------------------- - PyLucene with JCC now a subproject of the Apache Lucene project - documentation moved to http://lucene.apache.org/pylucene - added java.util.Arrays to the build to bridge the Java array/collection gap - added collections.py module with JavaSet class, a Python java.util.Set - fixed bug in PythonQueryParser overriding wrong method (Aaron Lav) - PyLucene built with JCC 2.2 - fixed bug with collections.py shadowing Python 2.6's during build - passing strings for byte[] or char[] is no longer supported, use JArray - added copy of PyLucene web site to distribution for offline viewing Version 2.3.2 -> 2.4.0 ---------------------- - fixed Debian bug http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=499599 - arrays are now wrapped with JArray() instances instead of expanded into lists - return by value in arrays now supported - PythonTermDocs removed since arrays can now receive values - PythonReader removed since arrays now wrapped - added InstantiatedIndex contrib to build - PyLucene built with JCC 2.1 Version 2.3.1 -> 2.3.2 ---------------------- - fixed code generation for clone() broken by finalization proxy work - added 'union' and 'NULL' to the list of reserved words - fixed castCheck() to work with finalization proxies - added scorePayload() delegator to PythonSimilarityDelegator - added support for --install-dir and --use-distutils options - added support for INSTALL_OPT to Makefile - fixed basic samples to initialize VM - added bdist target to Makefile Version 2.3 -> 2.3.1 -------------------- - fixed bug in JCC using the wrong field modifiers for setter (Bill Janssen) - added missing calls for generating wrappers for ancestors of Exception - added missing call for generating wrappers for String - added PythonTokenizer for implementing complete tokenizers in Python Version 2.2 -> 2.3 ------------------ - PyLucene with JCC introduced - added support for Python 2.3.5 - added support for using clone() with extensions - renamed decRef() (and incRef()) native extensions method to pythonDecRef() - improved error reporting a bit - JCC now generates Python properties for get/set/is methods - fixed bug in generated code invoking parent method when inherited from above - added support for building on 64-bit Linux (Ubuntu 7.10) - added support for implicitely iterable Enumeration - added support for --root and --prefix for jcc invocations (Esteve Fernandez) - jcc switched to setuptools by default (and fallback on distutils) - fixed bug http://bugzilla.osafoundation.org/show_bug.cgi?id=11643 - added support for automatic boxing of primitives when Object is expected - fixed bug in missing extensions' Iterator and Enumeration methods - added JavaSet.py sample using PythonSet and PythonIterator extensions - added missing LICENSE files - fixed memory leak when calling inherited methods via callSuper() - made finalize() method public on extensions for manually breaking ref cycle - added support for building on Solaris with Sun Studio C++ (Solaris 11) - fixed leak of local refs of jstring when converting to an array of String - automated finalization of extensions via proxy for breaking ref cycle - added Py_CLEAR and Py_VISIT macros for Python 2.3.5 compilation pylucene-4.10.1-1/CREDITS000644 000765 000000 00000002511 11131542141 015012 0ustar00vajdawheel000000 000000 PyLucene is a JCC-compiled Python extension of Java Lucene and wouldn't be possible without the tireless efforts of the people and open source projects below. - the Apache Lucene developers, http://lucene.apache.org/java/docs/whoweare.html - the Open Source Applications Foundation, for hosting the project from 2004 to 2008: http://www.osafoundation.org - Andi Vajda, PyLucene and JCC project founder and maintainer, for believing that PyLucene should be feasible - the following people contributed patches, samples, bug reports and resources: . Kapil Thangavelu (hazmat): FSDirectory support, first unit test . Frank Wierzbicki: IndexFiles.py and SearchFiles.py samples . Andreas Jung: several bug reports, nasty bugs indeed . Jeff Bowden: several bug reports and API additions via patches . Wai Yip Tung: test_PyLuceneThread.py unit test, windows threads testing . Yura Smolsky: test_Highlighter.py unit test, numerous bug reports . Steve Jenson: MultiFieldQueryParser addition to test_PyLucene.py . Erik Hatcher: man page index and search samples . Bill Janssen: many bug reports and 'shared mode' suggestion . Aaron Lav: several memory leaks, fixed with patches and tests . Grant Ingersoll: for inviting and sponsoring PyLucene's move to Apache Thank you all ! pylucene-4.10.1-1/extensions.xml000644 000765 000000 00000001323 11562320723 016723 0ustar00vajdawheel000000 000000 pylucene-4.10.1-1/INSTALL000644 000765 000000 00000000073 12134322213 015024 0ustar00vajdawheel000000 000000 Please see http://lucene.apache.org/pylucene/install.html pylucene-4.10.1-1/java/000755 000765 000000 00000000000 12413103672 014722 5ustar00vajdawheel000000 000000 pylucene-4.10.1-1/LICENSE000644 000765 000000 00000026136 11145416716 015025 0ustar00vajdawheel000000 000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. pylucene-4.10.1-1/Makefile000644 000765 000000 00000033617 12413103662 015452 0ustar00vajdawheel000000 000000 # Makefile for building PyLucene # # Supported operating systems: Mac OS X, Linux and Windows. # See INSTALL file for requirements. # See jcc/INSTALL for information about --shared. # # Steps to build # 1. Edit the sections below as documented # 2. Edit the JARS variable to add optional contrib modules not defaulted # 3. make # 4. make install # # The install target installs the lucene python extension in python's # site-packages directory. # VERSION=4.10.1-1 LUCENE_SVN_VER=HEAD LUCENE_VER=4.10.1 LUCENE_SVN=http://svn.apache.org/repos/asf/lucene/dev/tags/lucene_solr_4_10_1 PYLUCENE:=$(shell pwd) LUCENE_SRC=lucene-java-$(LUCENE_VER) LUCENE=$(LUCENE_SRC)/lucene # # You need to uncomment and edit the variables below in the section # corresponding to your operating system. # # Windows drive-absolute paths need to be expressed cygwin style. # # PREFIX: where programs are normally installed on your system (Unix). # PREFIX_PYTHON: where your version of python is installed. # JCC: how jcc is invoked, depending on the python version: # - python 2.7: # $(PYTHON) -m jcc # - python 2.6: # $(PYTHON) -m jcc.__main__ # - python 2.5: # $(PYTHON) -m jcc # - python 2.4: # $(PYTHON) $(PREFIX_PYTHON)/lib/python2.4/site-packages/jcc/__main__.py # NUM_FILES is the number of wrapper files to generate. By default, jcc # generates all C++ classes into one single file. This may exceed a compiler # limit. # # Mac OS X 10.6 (64-bit Python 2.6, Java 1.6) #PREFIX_PYTHON=/usr #ANT=ant #PYTHON=$(PREFIX_PYTHON)/bin/python #JCC=$(PYTHON) -m jcc.__main__ --shared --arch x86_64 #NUM_FILES=8 # Mac OS X 10.6 (MacPorts 1.8.0 64-bit Python 2.7, Java 1.6) #PREFIX_PYTHON=/opt/local #ANT=ant #PYTHON=$(PREFIX_PYTHON)/bin/python #JCC=$(PYTHON) -m jcc --shared --arch x86_64 #NUM_FILES=8 # Mac OS X 10.6 (64-bit and 32-bit Python 2.6 together, Java 1.6) #PREFIX_PYTHON=/usr #ANT=ant #PYTHON=$(PREFIX_PYTHON)/bin/python #JCC=$(PYTHON) -m jcc.__main__ --shared --arch x86_64 --arch i386 #NUM_FILES=8 # Mac OS X 10.5 (32-bit Python 2.5, Java 1.5) #PREFIX_PYTHON=/usr #ANT=ant #PYTHON=$(PREFIX_PYTHON)/bin/python #JCC=$(PYTHON) -m jcc --shared #NUM_FILES=8 # Mac OS X (Python 2.3.5, Java 1.5, setuptools 0.6c7, Intel Mac OS X 10.4) #PREFIX_PYTHON=/usr #ANT=ant #PYTHON=$(PREFIX_PYTHON)/bin/python #JCC=$(PYTHON) /System/Library/Frameworks/Python.framework/Versions/2.3/lib/python2.3/site-packages/JCC-2.3-py2.3-macosx-10.4-i386.egg/jcc/__init__.py #NUM_FILES=8 # Mac OS X (Python 2.3.5, Java 1.5, setuptools 0.6c7, PPC Mac OS X 10.4) #PREFIX_PYTHON=/usr #ANT=ant #PYTHON=$(PREFIX_PYTHON)/bin/python #JCC=$(PYTHON) /System/Library/Frameworks/Python.framework/Versions/2.3/lib/python2.3/site-packages/JCC-2.3-py2.3-macosx-10.4-ppc.egg/jcc/__init__.py #NUM_FILES=8 # Linux (Ubuntu 11.10 64-bit, Python 2.7.2, OpenJDK 1.7, setuptools 0.6.16) # Be sure to also set JDK['linux2'] in jcc's setup.py to the JAVA_HOME value # used below for ANT (and rebuild jcc after changing it). #PREFIX_PYTHON=/usr #ANT=JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64 /usr/bin/ant #PYTHON=$(PREFIX_PYTHON)/bin/python #JCC=$(PYTHON) -m jcc --shared #NUM_FILES=8 # Linux (Ubuntu 8.10 64-bit, Python 2.5.2, OpenJDK 1.6, setuptools 0.6c9) #PREFIX_PYTHON=/usr #ANT=ant #PYTHON=$(PREFIX_PYTHON)/bin/python #JCC=$(PYTHON) -m jcc --shared #NUM_FILES=8 # Linux (Ubuntu 6.06, Python 2.4, Java 1.5, no setuptools) #PREFIX_PYTHON=/usr #ANT=ant #PYTHON=$(PREFIX_PYTHON)/bin/python #JCC=$(PYTHON) $(PREFIX_PYTHON)/lib/python2.4/site-packages/jcc/__init__.py #NUM_FILES=8 # FreeBSD #PREFIX_PYTHON=/usr #ANT=ant #PYTHON=$(PREFIX_PYTHON)/bin/python #JCC=$(PYTHON) -m jcc #NUM_FILES=8 # Solaris (Solaris 11, Python 2.4 32-bit, Sun Studio 12, Java 1.6) #PREFIX_PYTHON=/usr #ANT=/usr/local/apache-ant-1.7.0/bin/ant #PYTHON=$(PREFIX_PYTHON)/bin/python #JCC=$(PYTHON) $(PREFIX_PYTHON)/lib/python2.4/site-packages/jcc/__init__.py #NUM_FILES=8 # Windows (Win32, Python 2.5.1, Java 1.6, ant 1.7.0) #PREFIX_PYTHON=/cygdrive/o/Python-2.5.2/PCbuild #ANT=JAVA_HOME=o:\\Java\\jdk1.6.0_02 /cygdrive/o/java/apache-ant-1.7.0/bin/ant #PYTHON=$(PREFIX_PYTHON)/python.exe #JCC=$(PYTHON) -m jcc --shared #NUM_FILES=8 # Windows (Win32, msys/MinGW, Python 2.6.4, Java 1.6, ant 1.7.1 (WinAnt)) #PREFIX_PYTHON=/c/Python26 #ANT=JAVA_HOME="c:\\Program Files\\Java\\jdk1.6.0_18" "/c/Program Files/WinAnt/bin/ant" #PYTHON=$(PREFIX_PYTHON)/python.exe #JCC=$(PYTHON) -m jcc.__main__ --shared --compiler mingw32 #NUM_FILES=8 # Windows (Win32, Python 2.7, Java 1.6, ant 1.8.1, Java not on PATH) #PREFIX_PYTHON=/cygdrive/c/Python27 #ANT=JAVA_HOME=c:\\jdk1.6.0_22 /cygdrive/c/java/apache-ant-1.8.1/bin/ant #PYTHON=$(PREFIX_PYTHON)/python.exe #JCC=$(PYTHON) -m jcc --shared --find-jvm-dll #NUM_FILES=8 JARS=$(LUCENE_JAR) # comment/uncomment the desired/undesired optional contrib modules below JARS+=$(ANALYZERS_JAR) # many language analyzers JARS+=$(MEMORY_JAR) # single-document memory index JARS+=$(HIGHLIGHTER_JAR) # needs memory contrib JARS+=$(EXTENSIONS_JAR) # needs highlighter contrib JARS+=$(QUERIES_JAR) # regex and other contrib queries JARS+=$(QUERYPARSER_JAR) # query parser JARS+=$(SANDBOX_JAR) # needed by query parser #JARS+=$(SMARTCN_JAR) # smart chinese analyzer JARS+=$(STEMPEL_JAR) # polish analyzer and stemmer #JARS+=$(SPATIAL_JAR) # spatial lucene JARS+=$(GROUPING_JAR) # grouping module JARS+=$(JOIN_JAR) # join module JARS+=$(FACET_JAR) # facet module JARS+=$(SUGGEST_JAR) # suggest/spell module JARS+=$(EXPRESSIONS_JAR) # expressions module # # No edits required below # SVNOP?=export ifeq ($(DEBUG),1) DEBUG_OPT=--debug endif DEFINES=-DPYLUCENE_VER="\"$(VERSION)\"" -DLUCENE_VER="\"$(LUCENE_VER)\"" LUCENE_JAR=$(LUCENE)/build/core/lucene-core-$(LUCENE_VER).jar ANALYZERS_JAR=$(LUCENE)/build/analysis/common/lucene-analyzers-common-$(LUCENE_VER).jar HIGHLIGHTER_JAR=$(LUCENE)/build/highlighter/lucene-highlighter-$(LUCENE_VER).jar MEMORY_JAR=$(LUCENE)/build/memory/lucene-memory-$(LUCENE_VER).jar EXTENSIONS_JAR=build/jar/extensions.jar QUERIES_JAR=$(LUCENE)/build/queries/lucene-queries-$(LUCENE_VER).jar QUERYPARSER_JAR=$(LUCENE)/build/queryparser/lucene-queryparser-$(LUCENE_VER).jar SANDBOX_JAR=$(LUCENE)/build/sandbox/lucene-sandbox-$(LUCENE_VER).jar SMARTCN_JAR=$(LUCENE)/build/analysis/smartcn/lucene-analyzers-smartcn-$(LUCENE_VER).jar STEMPEL_JAR=$(LUCENE)/build/analysis/stempel/lucene-analyzers-stempel-$(LUCENE_VER).jar SPATIAL_JAR=$(LUCENE)/build/spatial/lucene-spatial-$(LUCENE_VER).jar GROUPING_JAR=$(LUCENE)/build/grouping/lucene-grouping-$(LUCENE_VER).jar JOIN_JAR=$(LUCENE)/build/join/lucene-join-$(LUCENE_VER).jar FACET_JAR=$(LUCENE)/build/facet/lucene-facet-$(LUCENE_VER).jar SUGGEST_JAR=$(LUCENE)/build/suggest/lucene-suggest-$(LUCENE_VER).jar EXPRESSIONS_JAR=$(LUCENE)/build/expressions/lucene-expressions-$(LUCENE_VER).jar MISC_JAR=$(LUCENE)/build/misc/lucene-misc-$(LUCENE_VER).jar ANTLR_JAR=$(LUCENE)/expressions/lib/antlr-runtime-3.5.jar ASM_JAR=$(LUCENE)/expressions/lib/asm-4.1.jar ASM_COMMONS_JAR=$(LUCENE)/expressions/lib/asm-commons-4.1.jar ICUPKG:=$(shell which icupkg) .PHONY: generate compile install default all clean realclean \ sources ivy test jars distrib default: all $(LUCENE_SRC): svn $(SVNOP) --depth files -r $(LUCENE_SVN_VER) $(LUCENE_SVN) $(LUCENE_SRC) svn $(SVNOP) -r $(LUCENE_SVN_VER) $(LUCENE_SVN)/lucene $(LUCENE_SRC)/lucene sources: $(LUCENE_SRC) ivy: ifeq ($(ANT),) $(error ANT is not defined, please edit Makefile as required at top) else ifeq ($(PYTHON),) $(error PYTHON is not defined, please edit Makefile as required at top) else ifeq ($(JCC),) $(error JCC is not defined, please edit Makefile as required at top) else ifeq ($(NUM_FILES),) $(error NUM_FILES is not defined, please edit Makefile as required at top) endif cd $(LUCENE); ($(ANT) ivy-availability-check || $(ANT) ivy-bootstrap) to-orig: sources mkdir -p $(LUCENE)-orig tar -C $(LUCENE) -cf - . | tar -C $(LUCENE)-orig -xvf - from-orig: $(LUCENE)-orig mkdir -p $(LUCENE) tar -C $(LUCENE)-orig -cf - . | tar -C $(LUCENE) -xvf - lucene: rm -f $(LUCENE_JAR) $(MAKE) $(LUCENE_JAR) $(LUCENE_JAR): $(LUCENE) cd $(LUCENE); $(ANT) -Dversion=$(LUCENE_VER) $(ANALYZERS_JAR): $(LUCENE_JAR) cd $(LUCENE)/analysis; $(ANT) -Dversion=$(LUCENE_VER) compile $(MEMORY_JAR): $(LUCENE_JAR) cd $(LUCENE)/memory; $(ANT) -Dversion=$(LUCENE_VER) $(HIGHLIGHTER_JAR): $(LUCENE_JAR) cd $(LUCENE)/highlighter; $(ANT) -Dversion=$(LUCENE_VER) $(QUERIES_JAR): $(LUCENE_JAR) cd $(LUCENE)/queries; $(ANT) -Dversion=$(LUCENE_VER) $(QUERYPARSER_JAR): $(LUCENE_JAR) cd $(LUCENE)/queryparser; $(ANT) -Dversion=$(LUCENE_VER) $(SANDBOX_JAR): $(LUCENE_JAR) cd $(LUCENE)/sandbox; $(ANT) -Dversion=$(LUCENE_VER) $(EXTENSIONS_JAR): $(LUCENE_JAR) $(ANT) -f extensions.xml -Dlucene.dir=$(LUCENE_SRC) $(SMARTCN_JAR): $(LUCENE_JAR) cd $(LUCENE)/analysis/smartcn; $(ANT) -Dversion=$(LUCENE_VER) $(STEMPEL_JAR): $(LUCENE_JAR) cd $(LUCENE)/analysis/stempel; $(ANT) -Dversion=$(LUCENE_VER) $(SPATIAL_JAR): $(LUCENE_JAR) cd $(LUCENE)/spatial; $(ANT) -Dversion=$(LUCENE_VER) $(GROUPING_JAR): $(LUCENE_JAR) cd $(LUCENE)/grouping; $(ANT) -Dversion=$(LUCENE_VER) $(JOIN_JAR): $(LUCENE_JAR) cd $(LUCENE)/join; $(ANT) -Dversion=$(LUCENE_VER) $(FACET_JAR): $(LUCENE_JAR) cd $(LUCENE)/facet; $(ANT) -Dversion=$(LUCENE_VER) $(SUGGEST_JAR): $(LUCENE_JAR) cd $(LUCENE)/suggest; $(ANT) -Dversion=$(LUCENE_VER) $(EXPRESSIONS_JAR): $(LUCENE_JAR) cd $(LUCENE)/expressions; $(ANT) -Dversion=$(LUCENE_VER) $(MISC_JAR): $(LUCENE_JAR) cd $(LUCENE)/misc; $(ANT) -Dversion=$(LUCENE_VER) JCCFLAGS?= jars: $(JARS) $(MISC_JAR) $(ANTLR_JAR) $(ASM_JAR) $(ASM_COMMONS) ifneq ($(ICUPKG),) ICURES= $(LUCENE)/analysis/icu/src/resources RESOURCES=--resources $(ICURES) ifneq ($(PYTHON),) ENDIANNESS:=$(shell $(PYTHON) -c "import struct; print struct.pack('h', 1) == '\000\001' and 'b' or 'l'") endif resources: $(ICURES)/org/apache/lucene/analysis/icu/utr30.dat $(ICURES)/org/apache/lucene/analysis/icu/utr30.dat: $(ICURES)/org/apache/lucene/analysis/icu/utr30.nrm rm -f $@ cd $(dir $<); $(ICUPKG) --type $(ENDIANNESS) --add $(notdir $<) new $(notdir $@) else RESOURCES= resources: @echo ICU not installed endif GENERATE=$(JCC) $(foreach jar,$(JARS),--jar $(jar)) \ $(JCCFLAGS) --use_full_names \ --include $(MISC_JAR) \ --include $(ANTLR_JAR) \ --include $(ASM_JAR) \ --include $(ASM_COMMONS_JAR) \ --package java.lang java.lang.System \ java.lang.Runtime \ --package java.util java.util.Arrays \ java.util.Collections \ java.util.HashMap \ java.util.HashSet \ java.util.TreeSet \ java.lang.IllegalStateException \ java.lang.IndexOutOfBoundsException \ java.util.NoSuchElementException \ java.text.SimpleDateFormat \ java.text.DecimalFormat \ java.text.Collator \ --package java.util.concurrent java.util.concurrent.Executors \ --package java.util.regex \ --package java.io java.io.StringReader \ java.io.InputStreamReader \ java.io.FileInputStream \ java.io.DataInputStream \ --exclude org.apache.lucene.sandbox.queries.regex.JakartaRegexpCapabilities \ --exclude org.apache.regexp.RegexpTunnel \ --python lucene \ --mapping org.apache.lucene.document.Document 'get:(Ljava/lang/String;)Ljava/lang/String;' \ --mapping java.util.Properties 'getProperty:(Ljava/lang/String;)Ljava/lang/String;' \ --sequence java.util.AbstractList 'size:()I' 'get:(I)Ljava/lang/Object;' \ org.apache.lucene.index.IndexWriter:getReader \ --version $(LUCENE_VER) \ --module python/collections.py \ --module python/ICUNormalizer2Filter.py \ --module python/ICUFoldingFilter.py \ --module python/ICUTransformFilter.py \ $(RESOURCES) \ --files $(NUM_FILES) generate: jars $(GENERATE) compile: jars $(GENERATE) --build $(DEBUG_OPT) install: jars $(GENERATE) --install $(DEBUG_OPT) $(INSTALL_OPT) bdist: jars $(GENERATE) --bdist wininst: jars $(GENERATE) --wininst all: sources ivy jars resources compile @echo build of $(PYLUCENE_LIB) complete clean: if test -f $(LUCENE)/build.xml; then cd $(LUCENE); $(ANT) clean; fi rm -rf $(LUCENE)/build build realclean: if test ! -d $(LUCENE_SRC)/.svn; then rm -rf $(LUCENE_SRC) lucene.egg-info; else rm -rf $(LUCENE)/build; fi rm -rf build OS=$(shell uname) BUILD_TEST:=$(PYLUCENE)/build/test ifeq ($(findstring CYGWIN,$(OS)),CYGWIN) BUILD_TEST:=`cygpath -aw $(BUILD_TEST)` else ifeq ($(findstring MINGW,$(OS)),MINGW) BUILD_TEST:=`$(PYTHON) -c "import os, sys; print os.path.normpath(sys.argv[1]).replace(chr(92), chr(92)*2)" $(BUILD_TEST)` endif endif install-test: mkdir -p $(BUILD_TEST) PYTHONPATH=$(BUILD_TEST) $(GENERATE) --install $(DEBUG_OPT) --install-dir $(BUILD_TEST) test: install-test find test -name 'test_*.py' | PYTHONPATH=$(BUILD_TEST) xargs -t -n 1 $(PYTHON) ARCHIVE=pylucene-$(VERSION)-src.tar.gz distrib: mkdir -p distrib svn export --force . distrib/pylucene-$(VERSION) tar -cf - --exclude build $(LUCENE_SRC) | tar -C distrib/pylucene-$(VERSION) -xvf - cd distrib; tar --disable-copyfile -cvzf $(ARCHIVE) pylucene-$(VERSION) cd distrib; gpg2 --armor --output $(ARCHIVE).asc --detach-sig $(ARCHIVE) cd distrib; md5sum $(ARCHIVE) > $(ARCHIVE).md5 stage: cd distrib; scp -p $(ARCHIVE) $(ARCHIVE).asc $(ARCHIVE).md5 \ people.apache.org:public_html/staging_area release: cd distrib; cp -p $(ARCHIVE) $(ARCHIVE).asc $(ARCHIVE).md5 ../../dist/pylucene/ print-%: @echo $* = $($*) pylucene-4.10.1-1/NOTICE000644 000765 000000 00000000352 12134322213 014677 0ustar00vajdawheel000000 000000 Apache PyLucene Copyright 2009-2013 The Apache Software Foundation Copyright (c) 2004-2008 Open Source Applications Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). pylucene-4.10.1-1/python/000755 000765 000000 00000000000 12413103672 015322 5ustar00vajdawheel000000 000000 pylucene-4.10.1-1/README000644 000765 000000 00000000074 12134322213 014654 0ustar00vajdawheel000000 000000 Please see http://lucene.apache.org/pylucene/features.html pylucene-4.10.1-1/samples/000755 000765 000000 00000000000 12413103672 015445 5ustar00vajdawheel000000 000000 pylucene-4.10.1-1/test/000755 000765 000000 00000000000 12413103672 014760 5ustar00vajdawheel000000 000000 pylucene-4.10.1-1/test/BaseTestRangeFilter.py000644 000765 000000 00000007125 12070203240 021163 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import lucene # so as to get 'org' from random import seed, randint from PyLuceneTestCase import PyLuceneTestCase from org.apache.lucene.analysis.core import SimpleAnalyzer from org.apache.lucene.document import Document, Field, StringField from org.apache.lucene.store import RAMDirectory from org.apache.lucene.util import Version class BaseTestRangeFilter(PyLuceneTestCase): def __init__(self, *args): super(BaseTestRangeFilter, self).__init__(*args) # # Collation interacts badly with hyphens -- collation produces # different ordering than Unicode code-point ordering -- so two # indexes are created: one which can't have negative random # integers, for testing collated ranges, and the other which can # have negative random integers, for all other tests. # self.MAX_INT = 0x7fffffff class TestIndex(object): def __init__(_self, minR, maxR, allowNegativeRandomInts): _self.minR = minR _self.maxR = maxR _self.allowNegativeRandomInts = allowNegativeRandomInts _self.index = RAMDirectory() self.signedIndex = TestIndex(self.MAX_INT, ~self.MAX_INT, True) self.unsignedIndex = TestIndex(self.MAX_INT, 0, False) self.minId = 0 self.maxId = 10000 self.build(self.signedIndex) self.build(self.unsignedIndex) # # a simple padding function that should work with any int # def pad(self, n): if n < 0: return "-%0.10d" % (self.MAX_INT + n + 1) else: return "0%0.10d" % n def build(self, index): writer = self.getWriter(directory=index.index, analyzer=SimpleAnalyzer(Version.LUCENE_CURRENT)) seed(101) for d in xrange(self.minId, self.maxId + 1): doc = Document() doc.add(Field("id", self.pad(d), StringField.TYPE_STORED)) if index.allowNegativeRandomInts: r = randint(~self.MAX_INT, self.MAX_INT) else: r = randint(0, self.MAX_INT) if index.maxR < r: index.maxR = r if r < index.minR: index.minR = r doc.add(Field("rand", self.pad(r), StringField.TYPE_STORED)) doc.add(Field("body", "body", StringField.TYPE_STORED)) writer.addDocument(doc) writer.commit() writer.close() def testPad(self): tests = [-9999999, -99560, -100, -3, -1, 0, 3, 9, 10, 1000, 999999999] for i in xrange(0, len(tests) - 1): a = tests[i] b = tests[i + 1] aa = self.pad(a) bb = self.pad(b) label = "%s:%s vs %s:%s" %(a, aa, b, bb) self.assertEqual(len(aa), len(bb), "length of %s" %label) self.assert_(aa < bb, "compare less than %s" %label) pylucene-4.10.1-1/test/BaseTokenStreamTestCase.py000644 000765 000000 00000013450 12016246051 022016 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== from unittest import TestCase, main from lucene import JArray from java.io import StringReader from java.lang import Boolean from org.apache.lucene.analysis.tokenattributes import \ OffsetAttribute, CharTermAttribute, TypeAttribute, \ PositionIncrementAttribute from org.apache.pylucene.util import PythonAttributeImpl class BaseTokenStreamTestCase(TestCase): """ some helpers to test Analyzers and TokenStreams """ class CheckClearAttributesAttributeImpl(PythonAttributeImpl): def __init__(_self): super(PythonAttributeImpl, _self).__init__() _self.clearCalled = False def getAndResetClearCalled(_self): try: return _self.clearCalled finally: _self.clearCalled = False def clear(_self): _self.clearCalled = True def equals(_self, other): return ( CheckClearAttributesAttributeImpl.instance_(other) and CheckClearAttributesAttributeImpl.cast_(other).clearCalled == _self.clearCalled) def hashCode(_self): return 76137213 ^ Boolean.valueOf(_self.clearCalled).hashCode() def copyTo(_self, target): CheckClearAttributesAttributeImpl.cast_(target).clear() def _assertTokenStreamContents(self, ts, output, startOffsets=None, endOffsets=None, types=None, posIncrements=None, finalOffset=None): #checkClearAtt = ts.addAttribute(PythonAttribute.class_); self.assert_(output is not None) self.assert_(ts.hasAttribute(CharTermAttribute.class_), "has no CharTermAttribute") termAtt = ts.getAttribute(CharTermAttribute.class_) offsetAtt = None if (startOffsets is not None or endOffsets is not None or finalOffset is not None): self.assert_(ts.hasAttribute(OffsetAttribute.class_), "has no OffsetAttribute") offsetAtt = ts.getAttribute(OffsetAttribute.class_) typeAtt = None if types is not None: self.assert_(ts.hasAttribute(TypeAttribute.class_), "has no TypeAttribute") typeAtt = ts.getAttribute(TypeAttribute.class_) posIncrAtt = None if posIncrements is not None: self.assert_(ts.hasAttribute(PositionIncrementAttribute.class_), "has no PositionIncrementAttribute") posIncrAtt = ts.getAttribute(PositionIncrementAttribute.class_) ts.reset() for i in xrange(len(output)): # extra safety to enforce, that the state is not preserved and # also assign bogus values ts.clearAttributes() termAtt.setEmpty().append("bogusTerm") if offsetAtt is not None: offsetAtt.setOffset(14584724, 24683243) if typeAtt is not None: typeAtt.setType("bogusType") if posIncrAtt is not None: posIncrAtt.setPositionIncrement(45987657) self.assert_(ts.incrementToken(), "token %d exists" %(i)) self.assertEqual(output[i], termAtt.toString(), "term %d" %(i)) if startOffsets is not None: self.assertEqual(startOffsets[i], offsetAtt.startOffset(), "startOffset %d" %(i)) if endOffsets is not None: self.assertEqual(endOffsets[i], offsetAtt.endOffset(), "endOffset %d" %(i)) if types is not None: self.assertEqual(types[i], typeAtt.type(), "type %d" %(i)) if posIncrements is not None: self.assertEqual(posIncrements[i], posIncrAtt.getPositionIncrement(), "posIncrement %d" %(i)) self.assert_(not ts.incrementToken(), "end of stream") ts.end() ts.close() def _assertAnalyzesTo(self, a, input, output, startOffsets=None, endOffsets=None, types=None, posIncrements=None): ts = a.tokenStream("dummy", StringReader(input)) self._assertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements) def _assertAnalyzesToReuse(self, a, input, output, startOffsets=None, endOffsets=None, types=None, posIncrements=None): ts = a.reusableTokenStream("dummy", StringReader(input)) self._assertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements) # simple utility method for testing stemmers def _checkOneTerm(self, a, input, expected): self._assertAnalyzesTo(a, input, JArray('string')(expected)) def _checkOneTermReuse(self, a, input, expected): self._assertAnalyzesToReuse(a, input, JArray('string')(expected)) pylucene-4.10.1-1/test/MultiSpansWrapper.py000644 000765 000000 00000010467 12203701624 021000 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import sys, lucene # so that 'org' is found from java.util import Collections, HashMap, TreeSet from org.apache.lucene.index import Term, TermContext, ReaderUtil from org.apache.lucene.search import DocIdSetIterator from org.apache.lucene.search.spans import SpanQuery from org.apache.pylucene.search.spans import PythonSpans class MultiSpansWrapper(PythonSpans): def __init__(self, leaves, query, termContexts): super(MultiSpansWrapper, self).__init__() self.leaves = leaves self.numLeaves = leaves.size() self.query = query self.termContexts = termContexts self.leafOrd = 0 self.current = None @classmethod def wrap(cls, topLevelReaderContext, query): termContexts = HashMap() terms = TreeSet() query.extractTerms(terms) for term in terms: termContexts.put(term, TermContext.build(topLevelReaderContext, term)) leaves = topLevelReaderContext.leaves() if leaves.size() == 1: ctx = leaves.get(0) return query.getSpans(ctx, ctx.reader().getLiveDocs(), termContexts) return MultiSpansWrapper(leaves, query, termContexts) def next(self): if self.leafOrd >= self.numLeaves: return False if self.current is None: ctx = self.leaves.get(self.leafOrd) self.current = self.query.getSpans(ctx, ctx.reader().getLiveDocs(), self.termContexts) while True: if self.current.next(): return True self.leafOrd += 1 if self.leafOrd < self.numLeaves: ctx = self.leaves.get(self.leafOrd) self.current = self.query.getSpans(ctx, ctx.reader().getLiveDocs(), self.termContexts) else: self.current = None break return False def skipTo(self, target): if self.leafOrd >= self.numLeaves: return False subIndex = ReaderUtil.subIndex(target, self.leaves) assert subIndex >= self.leafOrd if subIndex != self.leafOrd: ctx = self.leaves.get(subIndex) self.current = self.query.getSpans(ctx, ctx.reader().getLiveDocs(), self.termContexts) self.leafOrd = subIndex elif self.current is None: ctx = self.leaves.get(self.leafOrd) self.current = self.query.getSpans(ctx, ctx.reader().getLiveDocs(), self.termContexts) while True: if self.current.skipTo(target - self.leaves.get(self.leafOrd).docBase): return True self.leafOrd += 1 if self.leafOrd < self.numLeaves: ctx = self.leaves.get(self.leafOrd) self.current = self.query.getSpans(ctx, ctx.reader().getLiveDocs(), self.termContexts) else: self.current = None break return False def doc(self): if self.current is None: return DocIdSetIterator.NO_MORE_DOCS return self.current.doc() + self.leaves.get(self.leafOrd).docBase def start(self): if self.current is None: return DocIdSetIterator.NO_MORE_DOCS return self.current.start() def end(self): if self.current is None: return DocIdSetIterator.NO_MORE_DOCS return self.current.end() def getPayload(self): if self.current is None: return Collections.emptyList() return self.current.getPayload() def isPayloadAvailable(self): if self.current is None: return False return self.current.isPayloadAvailable() def cost(self): return sys.maxint pylucene-4.10.1-1/test/PyLuceneTestCase.py000644 000765 000000 00000005362 12070436120 020514 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import lucene # so that 'org' is found from unittest import TestCase from java.io import File from org.apache.lucene.analysis.core import WhitespaceAnalyzer from org.apache.lucene.document import Field from org.apache.lucene.analysis.miscellaneous import LimitTokenCountAnalyzer from org.apache.lucene.index import \ IndexWriter, IndexWriterConfig, DirectoryReader from org.apache.lucene.search import IndexSearcher from org.apache.lucene.store import RAMDirectory from org.apache.lucene.util import Version class PyLuceneTestCase(TestCase): def __init__(self, *args): super(PyLuceneTestCase, self).__init__(*args) self.TEST_VERSION = Version.LUCENE_CURRENT def setUp(self): self.directory = RAMDirectory() def tearDown(self): self.directory.close() def getConfig(self, analyzer=None): return IndexWriterConfig(self.TEST_VERSION, analyzer) def getWriter(self, directory=None, analyzer=None, open_mode=None, similarity=None, maxBufferedDocs=None, mergePolicy=None): if analyzer is None: analyzer = LimitTokenCountAnalyzer(WhitespaceAnalyzer(self.TEST_VERSION), 10000) config = self.getConfig(analyzer) if open_mode is None: open_mode = IndexWriterConfig.OpenMode.CREATE config.setOpenMode(open_mode) if similarity is not None: config.setSimilarity(similarity) if maxBufferedDocs is not None: config.setMaxBufferedDocs(maxBufferedDocs) if mergePolicy is not None: config.setMergePolicy(mergePolicy) if directory is None: directory = self.directory return IndexWriter(directory, config) def getSearcher(self, directory=None, reader=None): if reader is not None: return IndexSearcher(reader) return IndexSearcher(self.getReader(directory=directory)) def getReader(self, directory=None): if directory is None: directory = self.directory return DirectoryReader.open(directory) pylucene-4.10.1-1/test/test_Analyzers.py000644 000765 000000 00000011412 12162654000 020335 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import sys, lucene, unittest from BaseTokenStreamTestCase import BaseTokenStreamTestCase from lucene import JArray from java.io import StringReader from org.apache.lucene.analysis.core import \ SimpleAnalyzer, WhitespaceAnalyzer, StopAnalyzer, WhitespaceTokenizer from org.apache.lucene.analysis.tokenattributes import PayloadAttribute from org.apache.lucene.util import Version, BytesRef from org.apache.pylucene.analysis import PythonTokenFilter class AnalyzersTestCase(BaseTokenStreamTestCase): """ Unit tests ported from Java Lucene """ def testSimple(self): a = SimpleAnalyzer(Version.LUCENE_CURRENT) self._assertAnalyzesTo(a, "foo bar FOO BAR", [ "foo", "bar", "foo", "bar" ]) self._assertAnalyzesTo(a, "foo bar . FOO <> BAR", [ "foo", "bar", "foo", "bar" ]) self._assertAnalyzesTo(a, "foo.bar.FOO.BAR", [ "foo", "bar", "foo", "bar" ]) self._assertAnalyzesTo(a, "U.S.A.", [ "u", "s", "a" ]) self._assertAnalyzesTo(a, "C++", [ "c" ]) self._assertAnalyzesTo(a, "B2B", [ "b", "b" ]) self._assertAnalyzesTo(a, "2B", [ "b" ]) self._assertAnalyzesTo(a, "\"QUOTED\" word", [ "quoted", "word" ]) def testNull(self): a = WhitespaceAnalyzer(Version.LUCENE_CURRENT) self._assertAnalyzesTo(a, "foo bar FOO BAR", [ "foo", "bar", "FOO", "BAR" ]) self._assertAnalyzesTo(a, "foo bar . FOO <> BAR", [ "foo", "bar", ".", "FOO", "<>", "BAR" ]) self._assertAnalyzesTo(a, "foo.bar.FOO.BAR", [ "foo.bar.FOO.BAR" ]) self._assertAnalyzesTo(a, "U.S.A.", [ "U.S.A." ]) self._assertAnalyzesTo(a, "C++", [ "C++" ]) self._assertAnalyzesTo(a, "B2B", [ "B2B" ]) self._assertAnalyzesTo(a, "2B", [ "2B" ]) self._assertAnalyzesTo(a, "\"QUOTED\" word", [ "\"QUOTED\"", "word" ]) def testStop(self): a = StopAnalyzer(Version.LUCENE_CURRENT) self._assertAnalyzesTo(a, "foo bar FOO BAR", [ "foo", "bar", "foo", "bar" ]) self._assertAnalyzesTo(a, "foo a bar such FOO THESE BAR", [ "foo", "bar", "foo", "bar" ]) def _verifyPayload(self, ts): ts.reset() payloadAtt = ts.getAttribute(PayloadAttribute.class_) b = 0 while True: b += 1 if not ts.incrementToken(): break self.assertEqual(b, payloadAtt.getPayload().bytes[0]) # Make sure old style next() calls result in a new copy of payloads def testPayloadCopy(self): s = "how now brown cow" ts = WhitespaceTokenizer(Version.LUCENE_CURRENT, StringReader(s)) ts = PayloadSetter(ts) self._verifyPayload(ts) ts = WhitespaceTokenizer(Version.LUCENE_CURRENT, StringReader(s)) ts = PayloadSetter(ts) self._verifyPayload(ts) class PayloadSetter(PythonTokenFilter): def __init__(self, input): super(PayloadSetter, self).__init__(input) self.input = input self.payloadAtt = self.addAttribute(PayloadAttribute.class_) self.data = JArray('byte')(1) self.p = BytesRef(self.data, 0, 1) def incrementToken(self): if not self.input.incrementToken(): return False self.payloadAtt.setPayload(self.p) self.data[0] += 1; return True if __name__ == "__main__": lucene.initVM(vmargs=['-Djava.awt.headless=true']) if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: unittest.main() except: pass else: unittest.main() pylucene-4.10.1-1/test/test_Binary.py000644 000765 000000 00000003026 12162654000 017613 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== # test PyLucene binary field import sys, lucene, unittest from lucene import JArray from org.apache.lucene.document import StoredField class BinaryTestCase(unittest.TestCase): def binary(self, b): c = JArray('byte')(b) field = StoredField("bin", c) v = field.binaryValue().bytes assert c == v and b == [a for a in v] def testBinary(self): self.binary([66, 90, 104, 57, 49, 65, 89, 38, 83, 89, 105, 56, 95, 75, 0, 0, 14, -41, -128]) self.binary([]) self.binary([0, 0, 0]) if __name__ == '__main__': lucene.initVM(vmargs=['-Djava.awt.headless=true']) if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: unittest.main() except: pass else: unittest.main() pylucene-4.10.1-1/test/test_BinaryDocument.py000644 000765 000000 00000010160 12162654000 021307 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import sys, lucene, unittest from lucene import JArray from PyLuceneTestCase import PyLuceneTestCase from org.apache.lucene.document import \ Document, StoredField, CompressionTools, Field, FieldType from org.apache.lucene.analysis.standard import StandardAnalyzer from org.apache.lucene.index import IndexWriter from org.apache.lucene.util import Version class TestBinaryDocument(PyLuceneTestCase): binaryValStored = "this text will be stored as a byte array in the index" binaryValCompressed = "this text will be also stored and compressed as a byte array in the index" def testBinaryFieldInIndex(self): ft = FieldType() ft.setStored(True) bytes = JArray('byte')(self.binaryValStored) binaryFldStored = StoredField("binaryStored", bytes) stringFldStored = Field("stringStored", self.binaryValStored, ft) doc = Document() doc.add(binaryFldStored) doc.add(stringFldStored) # test for field count self.assertEqual(2, doc.fields.size()) # add the doc to a ram index writer = self.getWriter(analyzer=StandardAnalyzer(Version.LUCENE_CURRENT)) writer.addDocument(doc) writer.close() # open a reader and fetch the document reader = self.getReader() docFromReader = reader.document(0) self.assert_(docFromReader is not None) # fetch the binary stored field and compare it's content with the # original one bytes = docFromReader.getBinaryValue("binaryStored") binaryFldStoredTest = bytes.bytes.string_ self.assertEqual(binaryFldStoredTest, self.binaryValStored) # fetch the string field and compare it's content with the original # one stringFldStoredTest = docFromReader.get("stringStored") self.assertEqual(stringFldStoredTest, self.binaryValStored) reader.close() def testCompressionTools(self): bytes = JArray('byte')(self.binaryValCompressed) binaryFldCompressed = StoredField("binaryCompressed", CompressionTools.compress(bytes)) stringFldCompressed = StoredField("stringCompressed", CompressionTools.compressString(self.binaryValCompressed)) doc = Document() doc.add(binaryFldCompressed) doc.add(stringFldCompressed) # add the doc to a ram index writer = self.getWriter(analyzer=StandardAnalyzer(Version.LUCENE_CURRENT)) writer.addDocument(doc) writer.close() # open a reader and fetch the document reader = self.getReader() docFromReader = reader.document(0) self.assert_(docFromReader is not None) # fetch the binary compressed field and compare it's content with # the original one bytes = CompressionTools.decompress(docFromReader.getBinaryValue("binaryCompressed")) binaryFldCompressedTest = bytes.string_ self.assertEqual(binaryFldCompressedTest, self.binaryValCompressed) self.assertEqual(CompressionTools.decompressString(docFromReader.getBinaryValue("stringCompressed")), self.binaryValCompressed) reader.close() if __name__ == '__main__': lucene.initVM(vmargs=['-Djava.awt.headless=true']) if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: unittest.main() except: pass else: unittest.main() pylucene-4.10.1-1/test/test_BooleanOr.py000644 000765 000000 00000011151 12162654000 020245 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import sys, lucene, unittest from PyLuceneTestCase import PyLuceneTestCase from org.apache.lucene.analysis.standard import StandardAnalyzer from org.apache.lucene.document import Document, Field, TextField from org.apache.lucene.index import Term from org.apache.lucene.search import BooleanClause, BooleanQuery, TermQuery from org.apache.lucene.util import Version class BooleanOrTestCase(PyLuceneTestCase): """ Unit tests ported from Java Lucene """ def __init__(self, *args): super(BooleanOrTestCase, self).__init__(*args) self.FIELD_T = "T" self.FIELD_C = "C" self.t1 = TermQuery(Term(self.FIELD_T, "files")) self.t2 = TermQuery(Term(self.FIELD_T, "deleting")) self.c1 = TermQuery(Term(self.FIELD_C, "production")) self.c2 = TermQuery(Term(self.FIELD_C, "optimize")) self.searcher = None def setUp(self): super(BooleanOrTestCase, self).setUp() # add the doc to a ram index writer = self.getWriter(analyzer=StandardAnalyzer(Version.LUCENE_CURRENT)) d = Document() d.add(Field(self.FIELD_T, "Optimize not deleting all files", TextField.TYPE_STORED)) d.add(Field(self.FIELD_C, "Deleted When I run an optimize in our production environment.", TextField.TYPE_STORED)) writer.addDocument(d) writer.close() self.searcher = self.getSearcher() def search(self, q): return self.searcher.search(q, 50).totalHits def testElements(self): self.assertEqual(1, self.search(self.t1)) self.assertEqual(1, self.search(self.t2)) self.assertEqual(1, self.search(self.c1)) self.assertEqual(1, self.search(self.c2)) def testFlat(self): q = BooleanQuery() q.add(BooleanClause(self.t1, BooleanClause.Occur.SHOULD)) q.add(BooleanClause(self.t2, BooleanClause.Occur.SHOULD)) q.add(BooleanClause(self.c1, BooleanClause.Occur.SHOULD)) q.add(BooleanClause(self.c2, BooleanClause.Occur.SHOULD)) self.assertEqual(1, self.search(q)) def testParenthesisMust(self): q3 = BooleanQuery() q3.add(BooleanClause(self.t1, BooleanClause.Occur.SHOULD)) q3.add(BooleanClause(self.t2, BooleanClause.Occur.SHOULD)) q4 = BooleanQuery() q4.add(BooleanClause(self.c1, BooleanClause.Occur.MUST)) q4.add(BooleanClause(self.c2, BooleanClause.Occur.MUST)) q2 = BooleanQuery() q2.add(q3, BooleanClause.Occur.SHOULD) q2.add(q4, BooleanClause.Occur.SHOULD) self.assertEqual(1, self.search(q2)) def testParenthesisMust2(self): q3 = BooleanQuery() q3.add(BooleanClause(self.t1, BooleanClause.Occur.SHOULD)) q3.add(BooleanClause(self.t2, BooleanClause.Occur.SHOULD)) q4 = BooleanQuery() q4.add(BooleanClause(self.c1, BooleanClause.Occur.SHOULD)) q4.add(BooleanClause(self.c2, BooleanClause.Occur.SHOULD)) q2 = BooleanQuery() q2.add(q3, BooleanClause.Occur.SHOULD) q2.add(q4, BooleanClause.Occur.MUST) self.assertEqual(1, self.search(q2)) def testParenthesisShould(self): q3 = BooleanQuery() q3.add(BooleanClause(self.t1, BooleanClause.Occur.SHOULD)) q3.add(BooleanClause(self.t2, BooleanClause.Occur.SHOULD)) q4 = BooleanQuery() q4.add(BooleanClause(self.c1, BooleanClause.Occur.SHOULD)) q4.add(BooleanClause(self.c2, BooleanClause.Occur.SHOULD)) q2 = BooleanQuery() q2.add(q3, BooleanClause.Occur.SHOULD) q2.add(q4, BooleanClause.Occur.SHOULD) self.assertEqual(1, self.search(q2)) if __name__ == "__main__": lucene.initVM(vmargs=['-Djava.awt.headless=true']) if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: unittest.main() except: pass else: unittest.main() pylucene-4.10.1-1/test/test_BooleanQuery.py000644 000765 000000 00000004345 12162654000 021001 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import sys, lucene, unittest from PyLuceneTestCase import PyLuceneTestCase from org.apache.lucene.index import Term from org.apache.lucene.search import BooleanClause, BooleanQuery, TermQuery class TestBooleanQuery(PyLuceneTestCase): """ Unit tests ported from Java Lucene """ def testEquality(self): bq1 = BooleanQuery() bq1.add(TermQuery(Term("field", "value1")), BooleanClause.Occur.SHOULD) bq1.add(TermQuery(Term("field", "value2")), BooleanClause.Occur.SHOULD) nested1 = BooleanQuery() nested1.add(TermQuery(Term("field", "nestedvalue1")), BooleanClause.Occur.SHOULD) nested1.add(TermQuery(Term("field", "nestedvalue2")), BooleanClause.Occur.SHOULD) bq1.add(nested1, BooleanClause.Occur.SHOULD) bq2 = BooleanQuery() bq2.add(TermQuery(Term("field", "value1")), BooleanClause.Occur.SHOULD) bq2.add(TermQuery(Term("field", "value2")), BooleanClause.Occur.SHOULD) nested2 = BooleanQuery() nested2.add(TermQuery(Term("field", "nestedvalue1")), BooleanClause.Occur.SHOULD) nested2.add(TermQuery(Term("field", "nestedvalue2")), BooleanClause.Occur.SHOULD) bq2.add(nested2, BooleanClause.Occur.SHOULD) self.assert_(bq1.equals(bq2)) if __name__ == "__main__": lucene.initVM(vmargs=['-Djava.awt.headless=true']) if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: unittest.main() except: pass else: unittest.main() pylucene-4.10.1-1/test/test_bug1564.py000644 000765 000000 00000004041 12162654000 017462 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import sys, lucene, unittest from PyLuceneTestCase import PyLuceneTestCase from org.apache.lucene.analysis.standard import StandardAnalyzer from org.apache.lucene.document import Document, Field, StoredField, TextField from org.apache.lucene.queryparser.classic import QueryParser from org.apache.lucene.util import Version class Test_Bug1564(PyLuceneTestCase): def setUp(self): super(Test_Bug1564, self).setUp() self.analyzer = StandardAnalyzer(Version.LUCENE_CURRENT) writer = self.getWriter(analyzer=self.analyzer) doc = Document() doc.add(Field('all', u'windowpane beplaster rapacious \ catatonia gauntlet wynn depressible swede pick dressmake supreme \ jeremy plumb theoretic bureaucracy causation chartres equipoise \ dispersible careen heard', TextField.TYPE_NOT_STORED)) doc.add(Field('id', '1', StoredField.TYPE)) writer.addDocument(doc) writer.commit() writer.close() def test_bug1564(self): searcher = self.getSearcher() query = QueryParser(Version.LUCENE_CURRENT, 'all', self.analyzer).parse('supreme') topDocs = searcher.search(query, 50) self.assertEqual(topDocs.totalHits, 1) if __name__ == '__main__': lucene.initVM(vmargs=['-Djava.awt.headless=true']) unittest.main() pylucene-4.10.1-1/test/test_bug1763.py000644 000765 000000 00000004730 12162654000 017470 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import sys, lucene, unittest from PyLuceneTestCase import PyLuceneTestCase from org.apache.lucene.analysis.standard import StandardAnalyzer from org.apache.lucene.document import Document, Field, StoredField, TextField from org.apache.lucene.queryparser.classic import QueryParser from org.apache.lucene.store import RAMDirectory from org.apache.lucene.util import Version class Test_Bug1763(PyLuceneTestCase): def setUp(self): super(Test_Bug1763, self).setUp() self.analyzer = StandardAnalyzer(Version.LUCENE_CURRENT) self.d1 = RAMDirectory() self.d2 = RAMDirectory() w1, w2 = [self.getWriter(directory=d, analyzer=self.analyzer) for d in [self.d1, self.d2]] doc1 = Document() doc2 = Document() doc1.add(Field("all", "blah blah double blah Gesundheit", TextField.TYPE_NOT_STORED)) doc1.add(Field('id', '1', StoredField.TYPE)) doc2.add(Field("all", "a quick brown test ran over the lazy data", TextField.TYPE_NOT_STORED)) doc2.add(Field('id', '2', StoredField.TYPE)) w1.addDocument(doc1) w2.addDocument(doc2) for w in [w1, w2]: w.close() def test_bug1763(self): w1 = self.getWriter(directory=self.d1, analyzer=self.analyzer) w1.addIndexes([self.getReader(directory=self.d2)]) w1.close() searcher = self.getSearcher(self.d1) q = QueryParser(Version.LUCENE_CURRENT, 'all', self.analyzer).parse('brown') topDocs = searcher.search(q, 50) self.assertEqual(searcher.doc(topDocs.scoreDocs[0].doc).get('id'), '2') if __name__ == '__main__': lucene.initVM(vmargs=['-Djava.awt.headless=true']) unittest.main() pylucene-4.10.1-1/test/test_bug1842.py000644 000765 000000 00000004453 12162654000 017470 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import sys, lucene, unittest from PyLuceneTestCase import PyLuceneTestCase from org.apache.lucene.analysis.standard import StandardAnalyzer from org.apache.lucene.document import Document, Field, FieldType, StringField from org.apache.lucene.index import Term from org.apache.lucene.search import TermQuery from org.apache.lucene.util import BytesRefIterator, Version class Test_Bug1842(PyLuceneTestCase): def setUp(self): super(Test_Bug1842, self).setUp() self.analyzer = StandardAnalyzer(Version.LUCENE_CURRENT) w1 = self.getWriter(analyzer=self.analyzer) doc1 = Document() ftype = FieldType() ftype.setStored(False) ftype.setIndexed(True) ftype.setStoreTermVectors(True) doc1.add(Field("all", "blah blah blah Gesundheit", ftype)) doc1.add(Field('id', '1', StringField.TYPE_NOT_STORED)) w1.addDocument(doc1) w1.close() def test_bug1842(self): reader = self.getReader() searcher = self.getSearcher() q = TermQuery(Term("id", '1')) topDocs = searcher.search(q, 50) termvec = reader.getTermVector(topDocs.scoreDocs[0].doc, "all") terms = [] freqs = [] termsEnum = termvec.iterator(None) for term in BytesRefIterator.cast_(termsEnum): terms.append(term.utf8ToString()) freqs.append(termsEnum.totalTermFreq()) terms.sort() self.assert_(terms == ['blah', 'gesundheit']) self.assert_(freqs == [3, 1]) if __name__ == '__main__': lucene.initVM(vmargs=['-Djava.awt.headless=true']) unittest.main() pylucene-4.10.1-1/test/test_CachingWrapperFilter.py000644 000765 000000 00000005252 12162654000 022435 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import sys, lucene, unittest from PyLuceneTestCase import PyLuceneTestCase from org.apache.lucene.analysis.standard import StandardAnalyzer from org.apache.lucene.index import \ AtomicReaderContext, SlowCompositeReaderWrapper from org.apache.lucene.search import CachingWrapperFilter from org.apache.lucene.util import Version, FixedBitSet from org.apache.pylucene.search import PythonFilter class CachingWrapperFilterTestCase(PyLuceneTestCase): """ Unit tests ported from Java Lucene """ def testCachingWorks(self): writer = self.getWriter(analyzer=StandardAnalyzer(Version.LUCENE_CURRENT)) writer.close() reader = SlowCompositeReaderWrapper.wrap(self.getReader()) context = AtomicReaderContext.cast_(reader.getContext()) class mockFilter(PythonFilter): def __init__(self): super(mockFilter, self).__init__() self._wasCalled = False def getDocIdSet(self, context, acceptDocs): self._wasCalled = True; return FixedBitSet(context.reader().maxDoc()) def clear(self): self._wasCalled = False def wasCalled(self): return self._wasCalled filter = mockFilter() cacher = CachingWrapperFilter(filter) # first time, nested filter is called strongRef = cacher.getDocIdSet(context, context.reader().getLiveDocs()) self.assert_(filter.wasCalled(), "first time") # second time, nested filter should not be called filter.clear() cacher.getDocIdSet(context, context.reader().getLiveDocs()) self.assert_(not filter.wasCalled(), "second time") reader.close() if __name__ == "__main__": lucene.initVM(vmargs=['-Djava.awt.headless=true']) if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: unittest.main() except: pass else: unittest.main() pylucene-4.10.1-1/test/test_Collections.py000644 000765 000000 00000021723 12162654000 020651 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import sys, lucene, unittest from lucene.collections import JavaSet, JavaList from java.lang import Class, Boolean, Integer, Long, Double, String from java.util import ArrayList, HashSet class Test_CollectionsSetBase(unittest.TestCase): """base test case for JavaSet (uses integers) subclass may redefine method 'createTestSet' """ def createTestSet(self): """creates the test set for this test case """ return set(range(9)) def setUp(self): self.testSet = self.createTestSet() self.javaSet = JavaSet(self.testSet) # print "created testSet: %s JavaSet %s" % (self.testSet,self.javaSet) def tearDown(self): del self.testSet del self.javaSet def test_Contains(self): elem0 = list(self.testSet)[0] self.assertTrue(self.javaSet.contains(elem0)) def test_Size(self): self.assertEqual(len(self.testSet), self.javaSet.size()) def test_Add(self): """must fail to add an existing element """ elem0 = list(self.testSet)[0] self.assertFalse(self.javaSet.add(elem0)) self.assertEqual(len(self.testSet), self.javaSet.size(), "size has not changed") def test_HashSet(self): """create HashSet in JVM (from the JavaSet) """ hashSet = HashSet(self.javaSet) # print "created HashSet:", hashSet, type(hashSet) self.assertEqual(self.javaSet.size(), hashSet.size(), "HashSet has same size") elem0 = list(self.testSet)[0] self.assertTrue(hashSet.contains(elem0)) def test_JArray(self): """create JArray in JVM (from the JavaSet) """ jArray = self.javaSet.toArray() # print "created JArray:", jArray, type(jArray) self.assertEqual(self.javaSet.size(),len(jArray), "JArray has same size") elem0 = jArray[0] elem1 = jArray[1] # print "JArray: first element: %s (%s)" % (elem0,type(elem0)) # print "JArray: second element: %s (%s)"% (elem1,type(elem1)) def test_ArrayList(self): """create ArrayList in JVM (from the JavaSet) """ arrayList = ArrayList(self.javaSet) # print "created ArrayList:", arrayList, type(arrayList) self.assertEqual(self.javaSet.size(), arrayList.size(), "ArrayList has same size") elem0 = arrayList.get(0) elem1 = arrayList.get(1) # print "ArrayList: first element: %s (%s) indexOf=%d" % (elem0,type(elem0), arrayList.indexOf(elem0)) # print "ArrayList: second element: %s (%s) indexOf=%d" % (elem1,type(elem1), arrayList.indexOf(elem1)) self.assertFalse(elem0.equals(elem1), "ArrayList: first element must NOT equal second element") self.assertNotEqual(elem0, elem1, "ArrayList: first element must NOT equal second element") class Test_CollectionsStringSet(Test_CollectionsSetBase): def createTestSet(self): return set(['a','b','c']) class Test_CollectionsFloatSet(Test_CollectionsSetBase): def createTestSet(self): return set([1.5, 4.5, -0.5]) class Test_CollectionsBoolList(Test_CollectionsSetBase): def createTestSet(self): return set([True,False]) class Test_CollectionsListBase(unittest.TestCase): """base test case for JavaList (uses integers) subclass may redefine method 'createTestList' """ def __init__(self, *args, **kwds): unittest.TestCase.__init__(self, *args, **kwds) self._primitive_types = { Class.forName('java.lang.Boolean'): Boolean, Class.forName('java.lang.Integer'): Integer, Class.forName('java.lang.Long'): Long, Class.forName('java.lang.Double'): Double, Class.forName('java.lang.String'): String } def createTestList(self): """creates the test list for this test case """ return range(9) def setUp(self): self.testList = self.createTestList() self.javaList = JavaList(self.testList) # print "created testList: %s JavaList %s" % (self.testList,self.javaList) def tearDown(self): del self.testList del self.javaList def test_Contains(self): elem0 = self.testList[0] self.assertTrue(self.javaList.contains(elem0)) def test_Size(self): self.assertEqual(len(self.testList), self.javaList.size()) def test_Pos(self): """elements must have same position """ elem0 = self.testList[0] elem1 = self.testList[1] pos0 = self.javaList.indexOf(elem0) pos1 = self.javaList.indexOf(elem1) self.assertEqual(pos0, 0, "indexOf first element") self.assertEqual(pos1, 1, "indexOf second element") def test_HashSet(self): """create HashSet in JVM (from the JavaSet) """ hashSet = HashSet(self.javaList) # print "created HashSet:", hashSet, type(hashSet) self.assertEqual(self.javaList.size(), hashSet.size(), "HashSet has same size") elem0 = self.testList[0] self.assertTrue(hashSet.contains(elem0)) def test_JArray(self): """create JArray in JVM (from the JavaSet) """ jArray = self.javaList.toArray() # print "created JArray:", jArray, type(jArray) self.assertEqual(self.javaList.size(),len(jArray), "JArray has same size") elem0 = jArray[0] elem1 = jArray[1] listElem0 = self.testList[0] listElem1 = self.testList[1] self.assertEqual(elem0, listElem0, "should be equal: %s (%s) <-> %s (%s)" % ( elem0,type(elem0), listElem0, type(listElem0))) self.assertEqual(elem1, listElem1, "should be equal: %s (%s) <-> %s (%s)" % ( elem1,type(elem1), listElem1, type(listElem1))) self.assertEqual(type(elem0), type(listElem0), "should have same type: %s <-> %s" % ( type(elem0), type(listElem0))) self.assertNotEqual(elem0, elem1, "JArray: first element must NOT equal second element") def test_ArrayList(self): """create ArrayList in JVM (from the JavaSet) """ arrayList = ArrayList(self.javaList) # print "created ArrayList:", arrayList, type(arrayList) self.assertEqual(self.javaList.size(), arrayList.size(), "ArrayList has same size") elem0 = arrayList.get(0) elem1 = arrayList.get(1) self.assertEqual(0, arrayList.indexOf(elem0), "same index position") self.assertEqual(1, arrayList.indexOf(elem1), "same index position") listElem0 = self.testList[0] listElem1 = self.testList[1] _type = self._primitive_types.get(elem0.getClass()) if _type is not None: elem0 = _type.class_.cast(elem0) elem1 = _type.class_.cast(elem1) self.assertEqual(elem0, listElem0, "should be equal: %s (%s) <-> %s (%s)" % ( elem0, type(elem0), listElem0, type(listElem0))) self.assertEqual(elem1, listElem1, "should be equal: %s (%s) <-> %s (%s)" % ( elem1, type(elem1), listElem1, type(listElem1))) self.assertEqual(type(elem0), type(listElem0), "should have same type: %s <-> %s" % ( type(elem0), type(listElem0))) self.assertNotEqual(elem0, elem1, "ArrayList: first element must NOT equal second element") class Test_CollectionsStringList(Test_CollectionsListBase): def createTestList(self): return [u'a', u'b', u'c'] class Test_CollectionsFloatList(Test_CollectionsListBase): def createTestList(self): return [1.5, 4.5, -0.5] class Test_CollectionsBoolList(Test_CollectionsListBase): def createTestList(self): return [True,False] if __name__ == '__main__': lucene.initVM(vmargs=['-Djava.awt.headless=true']) unittest.main() pylucene-4.10.1-1/test/test_DocBoost.py000644 000765 000000 00000005317 12162654000 020110 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import sys, lucene, unittest from PyLuceneTestCase import PyLuceneTestCase from org.apache.lucene.analysis.core import SimpleAnalyzer from org.apache.lucene.document import Document, Field, TextField from org.apache.lucene.index import Term from org.apache.lucene.search import TermQuery from org.apache.pylucene.search import PythonCollector from org.apache.lucene.util import Version class DocBoostTestCase(PyLuceneTestCase): """ Unit tests ported from Java Lucene """ def testDocBoost(self): writer = self.getWriter(analyzer=SimpleAnalyzer(Version.LUCENE_CURRENT)) f1 = Field("field", "word", TextField.TYPE_STORED) f2 = Field("field", "word", TextField.TYPE_STORED) f2.setBoost(2.0) d1 = Document() d2 = Document() d1.add(f1) # boost = 1 d2.add(f2) # boost = 2 writer.addDocument(d1) writer.addDocument(d2) writer.close() scores = [0.0] * 2 class collector(PythonCollector): def __init__(_self, scores): super(collector, _self).__init__() _self.scores = scores _self.base = 0 def collect(_self, doc, score): _self.scores[doc + _self.base] = score def setNextReader(_self, context): _self.base = context.docBase def acceptsDocsOutOfOrder(_self): return True self.getSearcher().search(TermQuery(Term("field", "word")), collector(scores)) lastScore = 0.0 for score in scores: self.assert_(score > lastScore) lastScore = score if __name__ == "__main__": lucene.initVM(vmargs=['-Djava.awt.headless=true']) if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: unittest.main() except: pass else: unittest.main() pylucene-4.10.1-1/test/test_FilteredQuery.py000644 000765 000000 00000011327 12162654000 021156 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import sys, lucene, unittest from PyLuceneTestCase import PyLuceneTestCase from java.util import BitSet from org.apache.lucene.analysis.core import WhitespaceAnalyzer from org.apache.lucene.document import Document, Field, TextField from org.apache.lucene.index import Term from org.apache.lucene.search import \ FilteredQuery, Sort, SortField, TermRangeQuery, TermQuery from org.apache.lucene.util import Bits, DocIdBitSet, Version from org.apache.pylucene.search import PythonFilter class FilteredQueryTestCase(PyLuceneTestCase): """ Unit tests ported from Java Lucene """ def setUp(self): super(FilteredQueryTestCase, self).setUp() writer = self.getWriter(analyzer=WhitespaceAnalyzer(Version.LUCENE_CURRENT)) doc = Document() doc.add(Field("field", "one two three four five", TextField.TYPE_STORED)) doc.add(Field("sorter", "b", TextField.TYPE_STORED)) writer.addDocument(doc) doc = Document() doc.add(Field("field", "one two three four", TextField.TYPE_STORED)) doc.add(Field("sorter", "d", TextField.TYPE_STORED)) writer.addDocument(doc) doc = Document() doc.add(Field("field", "one two three y", TextField.TYPE_STORED)) doc.add(Field("sorter", "a", TextField.TYPE_STORED)) writer.addDocument(doc) doc = Document() doc.add(Field("field", "one two x", TextField.TYPE_STORED)) doc.add(Field("sorter", "c", TextField.TYPE_STORED)) writer.addDocument(doc) writer.commit() writer.close() self.searcher = self.getSearcher() self.query = TermQuery(Term("field", "three")) class filter(PythonFilter): def getDocIdSet(self, context, acceptDocs): if acceptDocs is None: acceptDocs = Bits.MatchAllBits(5) bitset = BitSet(5) if acceptDocs.get(1): bitset.set(1) if acceptDocs.get(3): bitset.set(3) return DocIdBitSet(bitset) self.filter = filter() def testFilteredQuery(self): filteredquery = FilteredQuery(self.query, self.filter) topDocs = self.searcher.search(filteredquery, 50) self.assertEqual(1, topDocs.totalHits) self.assertEqual(1, topDocs.scoreDocs[0].doc) topDocs = self.searcher.search(filteredquery, None, 50, Sort(SortField("sorter", SortField.Type.STRING))) self.assertEqual(1, topDocs.totalHits) self.assertEqual(1, topDocs.scoreDocs[0].doc) filteredquery = FilteredQuery(TermQuery(Term("field", "one")), self.filter) topDocs = self.searcher.search(filteredquery, 50) self.assertEqual(2, topDocs.totalHits) filteredquery = FilteredQuery(TermQuery(Term("field", "x")), self.filter) topDocs = self.searcher.search(filteredquery, 50) self.assertEqual(1, topDocs.totalHits) self.assertEqual(3, topDocs.scoreDocs[0].doc) filteredquery = FilteredQuery(TermQuery(Term("field", "y")), self.filter) topDocs = self.searcher.search(filteredquery, 50) self.assertEqual(0, topDocs.totalHits) def testRangeQuery(self): """ This tests FilteredQuery's rewrite correctness """ rq = TermRangeQuery.newStringRange("sorter", "b", "d", True, True) filteredquery = FilteredQuery(rq, self.filter) scoreDocs = self.searcher.search(filteredquery, None, 1000).scoreDocs self.assertEqual(2, len(scoreDocs)) if __name__ == "__main__": lucene.initVM(vmargs=['-Djava.awt.headless=true']) if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: unittest.main() except: pass else: unittest.main() pylucene-4.10.1-1/test/test_FuzzyQuery.py000644 000765 000000 00000034045 12162654000 020551 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import sys, lucene, unittest from itertools import izip from lucene import JavaError from PyLuceneTestCase import PyLuceneTestCase from org.apache.lucene.analysis.standard import StandardAnalyzer from org.apache.lucene.document import Document, Field, TextField from org.apache.lucene.index import MultiReader, Term from org.apache.lucene.search import FuzzyQuery, MultiTermQuery from org.apache.lucene.store import RAMDirectory from org.apache.lucene.util import Version class FuzzyQueryTestCase(PyLuceneTestCase): """ Unit tests ported from Java Lucene """ def _addDoc(self, text, writer): doc = Document() doc.add(Field("field", text, TextField.TYPE_STORED)) writer.addDocument(doc) def testDefaultFuzziness(self): writer = self.getWriter() self._addDoc("aaaaa", writer) self._addDoc("aaaab", writer) self._addDoc("aaabb", writer) self._addDoc("aabbb", writer) self._addDoc("abbbb", writer) self._addDoc("bbbbb", writer) self._addDoc("ddddd", writer) writer.commit() writer.close() searcher = self.getSearcher() query = FuzzyQuery(Term("field", "aaaaa"), FuzzyQuery.defaultMaxEdits, 0) hits = searcher.search(query, None, 1000).scoreDocs self.assertEqual(3, len(hits)) # same with prefix query = FuzzyQuery(Term("field", "aaaaa"), FuzzyQuery.defaultMaxEdits, 1) hits = searcher.search(query, None, 1000).scoreDocs self.assertEqual(3, len(hits)) query = FuzzyQuery(Term("field", "aaaaa"), FuzzyQuery.defaultMaxEdits, 2) hits = searcher.search(query, None, 1000).scoreDocs self.assertEqual(3, len(hits)) query = FuzzyQuery(Term("field", "aaaaa"), FuzzyQuery.defaultMaxEdits, 3) hits = searcher.search(query, None, 1000).scoreDocs self.assertEqual(3, len(hits)) query = FuzzyQuery(Term("field", "aaaaa"), FuzzyQuery.defaultMaxEdits, 4) hits = searcher.search(query, None, 1000).scoreDocs self.assertEqual(2, len(hits)) query = FuzzyQuery(Term("field", "aaaaa"), FuzzyQuery.defaultMaxEdits, 5) hits = searcher.search(query, None, 1000).scoreDocs self.assertEqual(1, len(hits)) query = FuzzyQuery(Term("field", "aaaaa"), FuzzyQuery.defaultMaxEdits, 6) hits = searcher.search(query, None, 1000).scoreDocs self.assertEqual(1, len(hits)) # test scoring query = FuzzyQuery(Term("field", "bbbbb"), FuzzyQuery.defaultMaxEdits, 0) hits = searcher.search(query, None, 1000).scoreDocs self.assertEqual(3, len(hits), "3 documents should match") order = ("bbbbb", "abbbb", "aabbb") for hit, o in izip(hits, order): term = searcher.doc(hit.doc).get("field") self.assertEqual(o, term) # test pq size by supplying maxExpansions=2 # This query would normally return 3 documents, because 3 terms match # (see above): query = FuzzyQuery(Term("field", "bbbbb"), FuzzyQuery.defaultMaxEdits, 0, 2, False) hits = searcher.search(query, None, 1000).scoreDocs self.assertEqual(2, len(hits), "only 2 documents should match"); order = ("bbbbb","abbbb") for hit, o in izip(hits, order): term = searcher.doc(hit.doc).get("field") self.assertEqual(o, term) # not similar enough: query = FuzzyQuery(Term("field", "xxxxx")) topDocs = searcher.search(query, 50) self.assertEqual(0, topDocs.totalHits) # edit distance to "aaaaa" = 3 query = FuzzyQuery(Term("field", "aaccc")) topDocs = searcher.search(query, 50) self.assertEqual(0, topDocs.totalHits) # query identical to a word in the index: query = FuzzyQuery(Term("field", "aaaaa")) scoreDocs = searcher.search(query, 50).scoreDocs self.assertEqual(3, len(scoreDocs)) self.assertEqual(searcher.doc(scoreDocs[0].doc).get("field"), "aaaaa") # default allows for up to two edits: self.assertEqual(searcher.doc(scoreDocs[1].doc).get("field"), "aaaab") self.assertEqual(searcher.doc(scoreDocs[2].doc).get("field"), "aaabb") # query similar to a word in the index: query = FuzzyQuery(Term("field", "aaaac")) scoreDocs = searcher.search(query, 50).scoreDocs self.assertEqual(3, len(scoreDocs)) self.assertEqual(searcher.doc(scoreDocs[0].doc).get("field"), "aaaaa") self.assertEqual(searcher.doc(scoreDocs[1].doc).get("field"), "aaaab") self.assertEqual(searcher.doc(scoreDocs[2].doc).get("field"), "aaabb") # now with prefix query = FuzzyQuery(Term("field", "aaaac"), FuzzyQuery.defaultMaxEdits, 1) hits = searcher.search(query, None, 1000).scoreDocs self.assertEqual(3, len(hits)) self.assertEqual(searcher.doc(hits[0].doc).get("field"), ("aaaaa")) self.assertEqual(searcher.doc(hits[1].doc).get("field"), ("aaaab")) self.assertEqual(searcher.doc(hits[2].doc).get("field"), ("aaabb")) query = FuzzyQuery(Term("field", "aaaac"), FuzzyQuery.defaultMaxEdits, 2) hits = searcher.search(query, None, 1000).scoreDocs self.assertEqual(3, len(hits)) self.assertEqual(searcher.doc(hits[0].doc).get("field"), ("aaaaa")) self.assertEqual(searcher.doc(hits[1].doc).get("field"), ("aaaab")) self.assertEqual(searcher.doc(hits[2].doc).get("field"), ("aaabb")) query = FuzzyQuery(Term("field", "aaaac"), FuzzyQuery.defaultMaxEdits, 3) hits = searcher.search(query, None, 1000).scoreDocs self.assertEqual(3, len(hits)) self.assertEqual(searcher.doc(hits[0].doc).get("field"), ("aaaaa")) self.assertEqual(searcher.doc(hits[1].doc).get("field"), ("aaaab")) self.assertEqual(searcher.doc(hits[2].doc).get("field"), ("aaabb")) query = FuzzyQuery(Term("field", "aaaac"), FuzzyQuery.defaultMaxEdits, 4) hits = searcher.search(query, None, 1000).scoreDocs self.assertEqual(2, len(hits)) self.assertEqual(searcher.doc(hits[0].doc).get("field"), ("aaaaa")) self.assertEqual(searcher.doc(hits[1].doc).get("field"), ("aaaab")) query = FuzzyQuery(Term("field", "aaaac"), FuzzyQuery.defaultMaxEdits, 5) hits = searcher.search(query, None, 1000).scoreDocs self.assertEqual(0, len(hits)) query = FuzzyQuery(Term("field", "ddddX"), FuzzyQuery.defaultMaxEdits, 0) hits = searcher.search(query, None, 1000).scoreDocs self.assertEqual(1, len(hits)) self.assertEqual(searcher.doc(hits[0].doc).get("field"), ("ddddd")) # now with prefix query = FuzzyQuery(Term("field", "ddddX"), FuzzyQuery.defaultMaxEdits, 1) hits = searcher.search(query, None, 1000).scoreDocs self.assertEqual(1, len(hits)) self.assertEqual(searcher.doc(hits[0].doc).get("field"), ("ddddd")) query = FuzzyQuery(Term("field", "ddddX"), FuzzyQuery.defaultMaxEdits, 2) hits = searcher.search(query, None, 1000).scoreDocs self.assertEqual(1, len(hits)) self.assertEqual(searcher.doc(hits[0].doc).get("field"), ("ddddd")) query = FuzzyQuery(Term("field", "ddddX"), FuzzyQuery.defaultMaxEdits, 3) hits = searcher.search(query, None, 1000).scoreDocs; self.assertEqual(1, len(hits)) self.assertEqual(searcher.doc(hits[0].doc).get("field"), ("ddddd")) query = FuzzyQuery(Term("field", "ddddX"), FuzzyQuery.defaultMaxEdits, 4) hits = searcher.search(query, None, 1000).scoreDocs self.assertEqual(1, len(hits)) self.assertEqual(searcher.doc(hits[0].doc).get("field"), ("ddddd")) query = FuzzyQuery(Term("field", "ddddX"), FuzzyQuery.defaultMaxEdits, 5) hits = searcher.search(query, None, 1000).scoreDocs self.assertEqual(0, len(hits)) # different field = no match: query = FuzzyQuery(Term("anotherfield", "ddddX"), FuzzyQuery.defaultMaxEdits, 0) hits = searcher.search(query, None, 1000).scoreDocs self.assertEqual(0, len(hits)) def test2(self): writer = self.getWriter() self._addDoc("LANGE", writer) self._addDoc("LUETH", writer) self._addDoc("PIRSING", writer) self._addDoc("RIEGEL", writer) self._addDoc("TRZECZIAK", writer) self._addDoc("WALKER", writer) self._addDoc("WBR", writer) self._addDoc("WE", writer) self._addDoc("WEB", writer) self._addDoc("WEBE", writer) self._addDoc("WEBER", writer) self._addDoc("WEBERE", writer) self._addDoc("WEBREE", writer) self._addDoc("WEBEREI", writer) self._addDoc("WBRE", writer) self._addDoc("WITTKOPF", writer) self._addDoc("WOJNAROWSKI", writer) self._addDoc("WRICKE", writer) reader = writer.getReader() searcher = self.getSearcher(reader=reader) writer.close() query = FuzzyQuery(Term("field", "WEBER"), 2, 1) hits = searcher.search(query, None, 1000).scoreDocs self.assertEqual(8, len(hits)) def testTieBreaker(self): # MultiTermQuery provides (via attribute) information about which values # must be competitive to enter the priority queue. # # FuzzyQuery optimizes itself around this information, if the attribute # is not implemented correctly, there will be problems! # directory = RAMDirectory() writer = self.getWriter(directory=directory) self._addDoc("a123456", writer) self._addDoc("c123456", writer) self._addDoc("d123456", writer) self._addDoc("e123456", writer) directory2 = RAMDirectory() writer2 = self.getWriter(directory=directory2) self._addDoc("a123456", writer2) self._addDoc("b123456", writer2) self._addDoc("b123456", writer2) self._addDoc("b123456", writer2) self._addDoc("c123456", writer2) self._addDoc("f123456", writer2) ir1 = writer.getReader() ir2 = writer2.getReader() mr = MultiReader([ir1, ir2]) searcher = self.getSearcher(reader=mr) fq = FuzzyQuery(Term("field", "z123456"), 1, 0, 2, False) docs = searcher.search(fq, 2) self.assertEqual(5, docs.totalHits) # 5 docs, from the a and b's mr.close() ir1.close() ir2.close() writer.close() writer2.close() directory.close() directory2.close() def testBoostOnlyRewrite(self): # Test the TopTermsBoostOnlyBooleanQueryRewrite rewrite method. writer = self.getWriter() self._addDoc("Lucene", writer) self._addDoc("Lucene", writer) self._addDoc("Lucenne", writer) reader = writer.getReader() searcher = self.getSearcher(reader=reader) writer.close() query = FuzzyQuery(Term("field", "lucene")) query.setRewriteMethod(MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(50)) hits = searcher.search(query, None, 1000).scoreDocs self.assertEqual(3, len(hits)) # normally, 'Lucenne' would be the first result as IDF will skew the score. self.assertEqual("Lucene", reader.document(hits[0].doc).get("field")) self.assertEqual("Lucene", reader.document(hits[1].doc).get("field")) self.assertEqual("Lucenne", reader.document(hits[2].doc).get("field")) def testGiga(self): w = self.getWriter(analyzer=StandardAnalyzer(Version.LUCENE_CURRENT)) self._addDoc("Lucene in Action", w) self._addDoc("Lucene for Dummies", w) self._addDoc("Giga byte", w) self._addDoc("ManagingGigabytesManagingGigabyte", w) self._addDoc("ManagingGigabytesManagingGigabytes", w) self._addDoc("The Art of Computer Science", w) self._addDoc("J. K. Rowling", w) self._addDoc("JK Rowling", w) self._addDoc("Joanne K Roling", w) self._addDoc("Bruce Willis", w) self._addDoc("Willis bruce", w) self._addDoc("Brute willis", w) self._addDoc("B. willis", w) r = w.getReader() w.close() q = FuzzyQuery(Term("field", "giga"), 0) searcher = self.getSearcher(reader=r) hits = searcher.search(q, 10).scoreDocs self.assertEqual(1, len(hits)) self.assertEqual("Giga byte", searcher.doc(hits[0].doc).get("field")) def testDistanceAsEditsSearching(self): w = self.getWriter() self._addDoc("foobar", w) self._addDoc("test", w) self._addDoc("working", w) reader = w.getReader() searcher = self.getSearcher(reader=reader) w.close() q = FuzzyQuery(Term("field", "fouba"), 2) hits = searcher.search(q, 10).scoreDocs self.assertEqual(1, len(hits)) self.assertEqual("foobar", searcher.doc(hits[0].doc).get("field")) q = FuzzyQuery(Term("field", "foubara"), 2) hits = searcher.search(q, 10).scoreDocs self.assertEqual(1, len(hits)) self.assertEqual("foobar", searcher.doc(hits[0].doc).get("field")) try: q = FuzzyQuery(Term("field", "t"), 3) self.fail() except JavaError, e: #expected pass if __name__ == "__main__": lucene.initVM(vmargs=['-Djava.awt.headless=true']) if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: unittest.main() except: pass else: unittest.main() pylucene-4.10.1-1/test/test_Highlighter.py000644 000765 000000 00000014261 12162654000 020630 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import sys, lucene, unittest from PyLuceneTestCase import PyLuceneTestCase from java.io import StringReader from org.apache.lucene.analysis.standard import StandardAnalyzer from org.apache.lucene.document import Document, Field, TextField from org.apache.lucene.queryparser.classic import QueryParser from org.apache.lucene.search.highlight import \ Highlighter, QueryScorer, SimpleFragmenter from org.apache.lucene.util import Version from org.apache.pylucene.search.highlight import PythonFormatter class TestFormatter(PythonFormatter): def __init__(self, testCase): super(TestFormatter, self).__init__() self.testCase = testCase def highlightTerm(self, originalText, group): if group.getTotalScore() <= 0: return originalText; self.testCase.countHighlightTerm() return "" + originalText + "" class HighlighterTestCase(PyLuceneTestCase): """ Unit tests ported from Java Lucene. 2004 by Yura Smolsky ;) """ FIELD_NAME = "contents" texts = [ "A wicked problem is one for which each attempt to create a solution changes the understanding of the problem. Wicked problems cannot be solved in a traditional linear fashion, because the problem definition evolves as new possible solutions are considered and/or implemented." "Wicked problems always occur in a social context -- the wickedness of the problem reflects the diversity among the stakeholders in the problem." "From http://cognexus.org/id42.htm" "Most projects in organizations -- and virtually all technology-related projects these days -- are about wicked problems. Indeed, it is the social complexity of these problems, not their technical complexity, that overwhelms most current problem solving and project management approaches." "This text has a typo in referring to whicked problems" ]; def __init__(self, *args): super(HighlighterTestCase, self).__init__(*args) self.parser = QueryParser(Version.LUCENE_CURRENT, self.FIELD_NAME, StandardAnalyzer(Version.LUCENE_CURRENT)) def setUp(self): super(HighlighterTestCase, self).setUp() self.analyzer = StandardAnalyzer(Version.LUCENE_CURRENT) writer = self.getWriter(analyzer=self.analyzer) for text in self.texts: self.addDoc(writer, text) writer.commit() writer.close() self.reader = self.getReader() self.numHighlights = 0; def testSimpleHighlighter(self): self.doSearching("Wicked") highlighter = Highlighter(QueryScorer(self.query)) highlighter.setTextFragmenter(SimpleFragmenter(40)) maxNumFragmentsRequired = 2 for scoreDoc in self.scoreDocs: text = self.searcher.doc(scoreDoc.doc).get(self.FIELD_NAME) tokenStream = self.analyzer.tokenStream(self.FIELD_NAME, StringReader(text)) result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, "...") print "\t", result # Not sure we can assert anything here - just running to check we don't # throw any exceptions def testGetBestFragmentsSimpleQuery(self): self.doSearching("Wicked") self.doStandardHighlights() self.assert_(self.numHighlights == 3, ("Failed to find correct number of highlights, %d found" %(self.numHighlights))) def doSearching(self, queryString): self.searcher = self.getSearcher() self.query = self.parser.parse(queryString) # for any multi-term queries to work (prefix, wildcard, range, # fuzzy etc) you must use a rewritten query! self.query = self.query.rewrite(self.reader) print "Searching for:", self.query.toString(self.FIELD_NAME) self.scoreDocs = self.searcher.search(self.query, 100).scoreDocs self.numHighlights = 0 def doStandardHighlights(self): formatter = TestFormatter(self) highlighter = Highlighter(formatter, QueryScorer(self.query)) highlighter.setTextFragmenter(SimpleFragmenter(20)) for scoreDoc in self.scoreDocs: text = self.searcher.doc(scoreDoc.doc).get(self.FIELD_NAME) maxNumFragmentsRequired = 2 fragmentSeparator = "..." tokenStream = self.analyzer.tokenStream(self.FIELD_NAME, StringReader(text)) result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator) print "\t", result def countHighlightTerm(self): self.numHighlights += 1 # update stats used in assertions def addDoc(self, writer, text): d = Document() f = Field(self.FIELD_NAME, text, TextField.TYPE_STORED) d.add(f) writer.addDocument(d) if __name__ == "__main__": lucene.initVM(vmargs=['-Djava.awt.headless=true']) if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: unittest.main() except: pass else: unittest.main() pylucene-4.10.1-1/test/test_ICUFoldingFilter.py000644 000765 000000 00000006566 12162654000 021474 0ustar00vajdawheel000000 000000 # -*- coding: utf-8 -*- # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== # # Port of java/org/apache/lucene/analysis/icu/ICUFoldingFilter.java # using IBM's C++ ICU wrapped by PyICU (http://pyicu.osafoundation.org) try: from icu import Normalizer2, UNormalizationMode2 except ImportError, e: pass import sys, lucene, unittest from BaseTokenStreamTestCase import BaseTokenStreamTestCase from org.apache.lucene.analysis import Analyzer from org.apache.lucene.util import Version from org.apache.lucene.analysis.core import WhitespaceTokenizer from org.apache.pylucene.analysis import PythonAnalyzer class TestICUFoldingFilter(BaseTokenStreamTestCase): def testDefaults(self): from lucene.ICUFoldingFilter import ICUFoldingFilter class _analyzer(PythonAnalyzer): def createComponents(_self, fieldName, reader): source = WhitespaceTokenizer(Version.LUCENE_CURRENT, reader) return Analyzer.TokenStreamComponents(source, ICUFoldingFilter(source)) a = _analyzer() # case folding self._assertAnalyzesTo(a, "This is a test", [ "this", "is", "a", "test" ]) # case folding self._assertAnalyzesTo(a, u"Ruß", [ "russ" ]) # case folding with accent removal self._assertAnalyzesTo(a, u"ΜΆΪΟΣ", [ u"μαιοσ" ]) self._assertAnalyzesTo(a, u"Μάϊος", [ u"μαιοσ" ]) # supplementary case folding self._assertAnalyzesTo(a, u"𐐖", [ u"𐐾" ]) # normalization self._assertAnalyzesTo(a, u"ﴳﴺﰧ", [ u"طمطمطم" ]) # removal of default ignorables self._assertAnalyzesTo(a, u"क्‍ष", [ u"कष" ]) # removal of latin accents (composed) self._assertAnalyzesTo(a, u"résumé", [ "resume" ]) # removal of latin accents (decomposed) self._assertAnalyzesTo(a, u"re\u0301sume\u0301", [ u"resume" ]) # fold native digits self._assertAnalyzesTo(a, u"৭০৬", [ "706" ]) # ascii-folding-filter type stuff self._assertAnalyzesTo(a, u"đis is cræzy", [ "dis", "is", "craezy" ]) if __name__ == "__main__": try: import icu except ImportError: pass else: if icu.ICU_VERSION >= '49': lucene.initVM(vmargs=['-Djava.awt.headless=true']) if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: unittest.main() except: pass else: unittest.main() else: print >>sys.stderr, "ICU version >= 49 is required, running:", icu.ICU_VERSION pylucene-4.10.1-1/test/test_ICUNormalizer2Filter.py000644 000765 000000 00000006524 12162654000 022310 0ustar00vajdawheel000000 000000 # -*- coding: utf-8 -*- # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== # # Port of java/org/apache/lucene/analysis/icu/ICUNormalizer2Filter.java # using IBM's C++ ICU wrapped by PyICU (http://pyicu.osafoundation.org) try: from icu import Normalizer2, UNormalizationMode2 except ImportError, e: pass import sys, lucene, unittest from BaseTokenStreamTestCase import BaseTokenStreamTestCase from org.apache.lucene.analysis import Analyzer from org.apache.lucene.analysis.core import WhitespaceTokenizer from org.apache.lucene.util import Version from org.apache.pylucene.analysis import PythonAnalyzer class TestICUNormalizer2Filter(BaseTokenStreamTestCase): def testDefaults(self): from lucene.ICUNormalizer2Filter import ICUNormalizer2Filter class _analyzer(PythonAnalyzer): def createComponents(_self, fieldName, reader): source = WhitespaceTokenizer(Version.LUCENE_CURRENT, reader) return Analyzer.TokenStreamComponents(source, ICUNormalizer2Filter(source)) a = _analyzer() # case folding self._assertAnalyzesTo(a, "This is a test", [ "this", "is", "a", "test" ]) # case folding self._assertAnalyzesTo(a, "Ruß", [ "russ" ]) # case folding self._assertAnalyzesTo(a, u"ΜΆΪΟΣ", [ u"μάϊοσ" ]) self._assertAnalyzesTo(a, u"Μάϊος", [ u"μάϊοσ" ]) # supplementary case folding self._assertAnalyzesTo(a, u"𐐖", [ u"𐐾" ]) # normalization self._assertAnalyzesTo(a, u"ﴳﴺﰧ", [ u"طمطمطم" ]) # removal of default ignorables self._assertAnalyzesTo(a, u"क्‍ष", [ u"क्ष" ]) def testAlternate(self): from lucene.ICUNormalizer2Filter import ICUNormalizer2Filter class analyzer(PythonAnalyzer): # specify nfc with decompose to get nfd def tokenStream(_self, fieldName, reader): return ICUNormalizer2Filter(WhitespaceTokenizer(Version.LUCENE_CURRENT, reader), Normalizer2.getInstance(None, "nfc", UNormalizationMode2.DECOMPOSE)) a = analyzer() # decompose EAcute into E + combining Acute self._assertAnalyzesTo(a, u"\u00E9", [ u"\u0065\u0301" ]) if __name__ == "__main__": try: import icu except ImportError: pass else: lucene.initVM(vmargs=['-Djava.awt.headless=true']) if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: unittest.main() except: pass else: unittest.main() pylucene-4.10.1-1/test/test_ICUTransformFilter.py000644 000765 000000 00000007141 12162654000 022053 0ustar00vajdawheel000000 000000 # -*- coding: utf-8 -*- # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== # # Port of java/org/apache/lucene/analysis/icu/ICUTransformFilter.java # using IBM's C++ ICU wrapped by PyICU (http://pyicu.osafoundation.org) try: from icu import Transliterator, UTransDirection except ImportError, e: pass import sys, lucene, unittest from BaseTokenStreamTestCase import BaseTokenStreamTestCase from java.io import StringReader from org.apache.lucene.util import Version from org.apache.lucene.analysis.core import KeywordTokenizer from org.apache.pylucene.analysis import PythonTokenFilter class TestICUTransformFilter(BaseTokenStreamTestCase): def _checkToken(self, transform, input, expected): from lucene.ICUTransformFilter import ICUTransformFilter ts = ICUTransformFilter(KeywordTokenizer(StringReader(input)), transform) self._assertTokenStreamContents(ts, [ expected ]) def _getTransliterator(self, name): return Transliterator.createInstance(name, UTransDirection.FORWARD) def testBasicFunctionality(self): self._checkToken(self._getTransliterator("Traditional-Simplified"), u"簡化字", u"简化字") self._checkToken(self._getTransliterator("Katakana-Hiragana"), u"ヒラガナ", u"ひらがな") self._checkToken(self._getTransliterator("Fullwidth-Halfwidth"), u"アルアノリウ", u"アルアノリウ") self._checkToken(self._getTransliterator("Any-Latin"), u"Αλφαβητικός Κατάλογος", u"Alphabētikós Katálogos") self._checkToken(self._getTransliterator("NFD; [:Nonspacing Mark:] Remove"), u"Alphabētikós Katálogos", u"Alphabetikos Katalogos") self._checkToken(self._getTransliterator("Han-Latin"), u"中国", u"zhōng guó") def testCustomFunctionality(self): # convert a's to b's and b's to c's rules = "a > b; b > c;" self._checkToken(Transliterator.createFromRules("test", rules, UTransDirection.FORWARD), "abacadaba", "bcbcbdbcb") def testCustomFunctionality2(self): # convert a's to b's and b's to c's rules = "c { a > b; a > d;" self._checkToken(Transliterator.createFromRules("test", rules, UTransDirection.FORWARD), "caa", "cbd") def testOptimizer2(self): self._checkToken(self._getTransliterator("Traditional-Simplified; Lower"), "ABCDE", "abcde") if __name__ == "__main__": try: import icu except ImportError: pass else: lucene.initVM(vmargs=['-Djava.awt.headless=true']) if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: unittest.main() except: pass else: unittest.main() pylucene-4.10.1-1/test/test_IndexDeletionPolicy.py000644 000765 000000 00000005667 12203701624 022320 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import sys, lucene, unittest from PyLuceneTestCase import PyLuceneTestCase from org.apache.lucene.analysis.core import WhitespaceAnalyzer from org.apache.lucene.document import Document from org.apache.lucene.index import DirectoryReader, IndexWriterConfig from org.apache.pylucene.index import PythonIndexDeletionPolicy class MyDeletionPolicy(PythonIndexDeletionPolicy): onInitCalled = False onCommitCalled = False def onInit(self, commits): self.onInitCalled = True def onCommit(self, commits): self.onCommitCalled = True class IndexDeletionPolicyTestCase(PyLuceneTestCase): def getConfig(self, analyzer): self.policy = MyDeletionPolicy() config = IndexWriterConfig(self.TEST_VERSION, analyzer) config.setIndexDeletionPolicy(self.policy) return config def testIndexDeletionPolicy(self): writer = self.getWriter() # no commits exist in the index yet self.assertTrue(self.policy.onInitCalled) # we haven't called commit yet self.assertFalse(self.policy.onCommitCalled) doc = Document() writer.addDocument(doc) writer.commit() # now we called commit self.assertTrue(self.policy.onCommitCalled) # external IR sees 1 commit: self.assertEquals(1, DirectoryReader.listCommits(self.directory).size()) # commit again: writer.addDocument(doc) writer.commit() # external IR sees 2 commits: self.assertEquals(2, DirectoryReader.listCommits(self.directory).size()) writer.close() # open same index, make sure both commits survived: writer = self.getWriter() self.assertTrue(self.policy.onInitCalled) self.assertFalse(self.policy.onCommitCalled) self.assertEquals(2, DirectoryReader.listCommits(self.directory).size()) writer.close() # 3 from closing writer again self.assertEquals(3, DirectoryReader.listCommits(self.directory).size()) if __name__ == "__main__": lucene.initVM() if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: unittest.main() except: pass else: unittest.main() pylucene-4.10.1-1/test/test_Not.py000644 000765 000000 00000003632 12162654000 017132 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import sys, lucene, unittest from PyLuceneTestCase import PyLuceneTestCase from org.apache.lucene.analysis.core import SimpleAnalyzer from org.apache.lucene.document import Document, Field, TextField from org.apache.lucene.queryparser.classic import QueryParser from org.apache.lucene.util import Version class NotTestCase(PyLuceneTestCase): """ Unit tests ported from Java Lucene """ def testNot(self): writer = self.getWriter(analyzer=SimpleAnalyzer(Version.LUCENE_CURRENT)) d1 = Document() d1.add(Field("field", "a b", TextField.TYPE_STORED)) writer.addDocument(d1) writer.commit() writer.close() searcher = self.getSearcher() query = QueryParser(Version.LUCENE_CURRENT, "field", SimpleAnalyzer(Version.LUCENE_CURRENT)).parse("a NOT b") topDocs = searcher.search(query, 50) self.assertEqual(0, topDocs.totalHits) if __name__ == "__main__": lucene.initVM(vmargs=['-Djava.awt.headless=true']) if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: unittest.main() except: pass else: unittest.main() pylucene-4.10.1-1/test/test_PerFieldAnalyzerWrapper.py000644 000765 000000 00000004610 12162654000 023130 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import sys, lucene, unittest from PyLuceneTestCase import PyLuceneTestCase from java.io import StringReader from java.util import HashMap from org.apache.lucene.analysis.core import SimpleAnalyzer, WhitespaceAnalyzer from org.apache.lucene.analysis.miscellaneous import PerFieldAnalyzerWrapper from org.apache.lucene.analysis.tokenattributes import CharTermAttribute from org.apache.lucene.util import Version class PerFieldAnalyzerTestCase(PyLuceneTestCase): """ Unit tests ported from Java Lucene """ def testPerField(self): perField = HashMap() perField.put("special", SimpleAnalyzer(Version.LUCENE_CURRENT)) analyzer = PerFieldAnalyzerWrapper(WhitespaceAnalyzer(Version.LUCENE_CURRENT), perField) text = "Qwerty" tokenStream = analyzer.tokenStream("field", StringReader(text)) tokenStream.reset() termAtt = tokenStream.getAttribute(CharTermAttribute.class_) self.assert_(tokenStream.incrementToken()) self.assertEqual("Qwerty", termAtt.toString(), "WhitespaceAnalyzer does not lowercase") tokenStream = analyzer.tokenStream("special", StringReader(text)) tokenStream.reset() termAtt = tokenStream.getAttribute(CharTermAttribute.class_) self.assert_(tokenStream.incrementToken()) self.assertEqual("qwerty", termAtt.toString(), "SimpleAnalyzer lowercases") if __name__ == "__main__": lucene.initVM(vmargs=['-Djava.awt.headless=true']) if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: unittest.main() except: pass else: unittest.main() pylucene-4.10.1-1/test/test_PhraseQuery.py000644 000765 000000 00000020754 12203701624 020647 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import sys, lucene, unittest from PyLuceneTestCase import PyLuceneTestCase from org.apache.lucene.analysis import Analyzer from org.apache.lucene.analysis.core import LowerCaseTokenizer, StopAnalyzer from org.apache.lucene.analysis.tokenattributes import CharTermAttribute from org.apache.lucene.document import Document, Field, TextField from org.apache.lucene.index import Term from org.apache.lucene.search import \ BooleanClause, BooleanQuery, PhraseQuery, TermQuery from org.apache.lucene.util import Version from org.apache.pylucene.analysis import \ PythonAnalyzer, PythonFilteringTokenFilter class PhraseQueryTestCase(PyLuceneTestCase): """ Unit tests ported from Java Lucene """ def setUp(self): super(PhraseQueryTestCase, self).setUp() doc = Document() doc.add(Field("field", "one two three four five", TextField.TYPE_STORED)) writer = self.getWriter() writer.addDocument(doc) writer.close() self.searcher = self.getSearcher() self.query = PhraseQuery() def testNotCloseEnough(self): self.query.setSlop(2) self.query.add(Term("field", "one")) self.query.add(Term("field", "five")) topDocs = self.searcher.search(self.query, 50) self.assertEqual(0, topDocs.totalHits) def testBarelyCloseEnough(self): self.query.setSlop(3) self.query.add(Term("field", "one")) self.query.add(Term("field", "five")) topDocs = self.searcher.search(self.query, 50) self.assertEqual(1, topDocs.totalHits) def testExact(self): """ Ensures slop of 0 works for exact matches, but not reversed """ # slop is zero by default self.query.add(Term("field", "four")) self.query.add(Term("field", "five")) topDocs = self.searcher.search(self.query, 50) self.assertEqual(1, topDocs.totalHits, "exact match") self.query = PhraseQuery() self.query.add(Term("field", "two")) self.query.add(Term("field", "one")) topDocs = self.searcher.search(self.query, 50) self.assertEqual(0, topDocs.totalHits, "reverse not exact") def testSlop1(self): # Ensures slop of 1 works with terms in order. self.query.setSlop(1) self.query.add(Term("field", "one")) self.query.add(Term("field", "two")) topDocs = self.searcher.search(self.query, 50) self.assertEqual(1, topDocs.totalHits, "in order") # Ensures slop of 1 does not work for phrases out of order # must be at least 2. self.query = PhraseQuery() self.query.setSlop(1) self.query.add(Term("field", "two")) self.query.add(Term("field", "one")) topDocs = self.searcher.search(self.query, 50) self.assertEqual(0, topDocs.totalHits, "reversed, slop not 2 or more") def testOrderDoesntMatter(self): """ As long as slop is at least 2, terms can be reversed """ self.query.setSlop(2) # must be at least two for reverse order match self.query.add(Term("field", "two")) self.query.add(Term("field", "one")) topDocs = self.searcher.search(self.query, 50) self.assertEqual(1, topDocs.totalHits, "just sloppy enough") self.query = PhraseQuery() self.query.setSlop(2) self.query.add(Term("field", "three")) self.query.add(Term("field", "one")) topDocs = self.searcher.search(self.query, 50) self.assertEqual(0, topDocs.totalHits, "not sloppy enough") def testMultipleTerms(self): """ slop is the total number of positional moves allowed to line up a phrase """ self.query.setSlop(2) self.query.add(Term("field", "one")) self.query.add(Term("field", "three")) self.query.add(Term("field", "five")) topDocs = self.searcher.search(self.query, 50) self.assertEqual(1, topDocs.totalHits, "two total moves") self.query = PhraseQuery() self.query.setSlop(5) # it takes six moves to match this phrase self.query.add(Term("field", "five")) self.query.add(Term("field", "three")) self.query.add(Term("field", "one")) topDocs = self.searcher.search(self.query, 50) self.assertEqual(0, topDocs.totalHits, "slop of 5 not close enough") self.query.setSlop(6) topDocs = self.searcher.search(self.query, 50) self.assertEqual(1, topDocs.totalHits, "slop of 6 just right") def testPhraseQueryWithStopAnalyzer(self): writer = self.getWriter(analyzer=StopAnalyzer(Version.LUCENE_CURRENT)) doc = Document() doc.add(Field("field", "the stop words are here", TextField.TYPE_STORED)) writer.addDocument(doc) writer.close() searcher = self.getSearcher() # valid exact phrase query query = PhraseQuery() query.add(Term("field", "stop")) query.add(Term("field", "words")) scoreDocs = searcher.search(query, None, 50).scoreDocs self.assertEqual(1, len(scoreDocs)) def testPhraseQueryInConjunctionScorer(self): writer = self.getWriter() doc = Document() doc.add(Field("source", "marketing info", TextField.TYPE_STORED)) writer.addDocument(doc) doc = Document() doc.add(Field("contents", "foobar", TextField.TYPE_STORED)) doc.add(Field("source", "marketing info", TextField.TYPE_STORED)) writer.addDocument(doc) writer.close() searcher = self.getSearcher() phraseQuery = PhraseQuery() phraseQuery.add(Term("source", "marketing")) phraseQuery.add(Term("source", "info")) topDocs = searcher.search(phraseQuery, 50) self.assertEqual(2, topDocs.totalHits) termQuery = TermQuery(Term("contents","foobar")) booleanQuery = BooleanQuery() booleanQuery.add(termQuery, BooleanClause.Occur.MUST) booleanQuery.add(phraseQuery, BooleanClause.Occur.MUST) topDocs = searcher.search(booleanQuery, 50) self.assertEqual(1, topDocs.totalHits) writer = self.getWriter() doc = Document() doc.add(Field("contents", "map entry woo", TextField.TYPE_STORED)) writer.addDocument(doc) doc = Document() doc.add(Field("contents", "woo map entry", TextField.TYPE_STORED)) writer.addDocument(doc) doc = Document() doc.add(Field("contents", "map foobarword entry woo", TextField.TYPE_STORED)) writer.addDocument(doc) writer.close() searcher = self.getSearcher() termQuery = TermQuery(Term("contents", "woo")) phraseQuery = PhraseQuery() phraseQuery.add(Term("contents", "map")) phraseQuery.add(Term("contents", "entry")) topDocs = searcher.search(termQuery, 50) self.assertEqual(3, topDocs.totalHits) topDocs = searcher.search(phraseQuery, 50) self.assertEqual(2, topDocs.totalHits) booleanQuery = BooleanQuery() booleanQuery.add(termQuery, BooleanClause.Occur.MUST) booleanQuery.add(phraseQuery, BooleanClause.Occur.MUST) topDocs = searcher.search(booleanQuery, 50) self.assertEqual(2, topDocs.totalHits) booleanQuery = BooleanQuery() booleanQuery.add(phraseQuery, BooleanClause.Occur.MUST) booleanQuery.add(termQuery, BooleanClause.Occur.MUST) topDocs = searcher.search(booleanQuery, 50) self.assertEqual(2, topDocs.totalHits) if __name__ == "__main__": lucene.initVM(vmargs=['-Djava.awt.headless=true']) if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: unittest.main() except: pass else: unittest.main() pylucene-4.10.1-1/test/test_PositionIncrement.py000644 000765 000000 00000026067 12203701624 022053 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import sys, lucene, unittest from lucene import JArray from PyLuceneTestCase import PyLuceneTestCase from MultiSpansWrapper import MultiSpansWrapper from java.io import StringReader from org.apache.lucene.analysis import Analyzer from org.apache.lucene.analysis.core import \ LowerCaseTokenizer, WhitespaceTokenizer from org.apache.lucene.analysis.tokenattributes import \ CharTermAttribute, OffsetAttribute, PayloadAttribute, \ PositionIncrementAttribute from org.apache.lucene.document import Document, Field, TextField from org.apache.lucene.index import MultiFields, Term from org.apache.lucene.queryparser.classic import QueryParser from org.apache.lucene.search import MultiPhraseQuery, PhraseQuery from org.apache.lucene.search.payloads import PayloadSpanUtil from org.apache.lucene.search.spans import SpanNearQuery, SpanTermQuery from org.apache.lucene.util import BytesRef, Version from org.apache.pylucene.analysis import \ PythonAnalyzer, PythonFilteringTokenFilter, PythonTokenFilter, \ PythonTokenizer class PositionIncrementTestCase(PyLuceneTestCase): """ Unit tests ported from Java Lucene """ def testSetPosition(self): class _tokenizer(PythonTokenizer): def __init__(_self, reader): super(_tokenizer, _self).__init__(reader) _self.TOKENS = ["1", "2", "3", "4", "5"] _self.INCREMENTS = [1, 2, 1, 0, 1] _self.i = 0 _self.posIncrAtt = _self.addAttribute(PositionIncrementAttribute.class_) _self.termAtt = _self.addAttribute(CharTermAttribute.class_) _self.offsetAtt = _self.addAttribute(OffsetAttribute.class_) def incrementToken(_self): if _self.i == len(_self.TOKENS): return False _self.clearAttributes() _self.termAtt.append(_self.TOKENS[_self.i]) _self.offsetAtt.setOffset(_self.i, _self.i) _self.posIncrAtt.setPositionIncrement(_self.INCREMENTS[_self.i]) _self.i += 1 return True def end(_self): pass def reset(_self): pass def close(_self): pass class _analyzer(PythonAnalyzer): def createComponents(_self, fieldName, reader): return Analyzer.TokenStreamComponents(_tokenizer(reader)) writer = self.getWriter(analyzer=_analyzer()) d = Document() d.add(Field("field", "bogus", TextField.TYPE_STORED)) writer.addDocument(d) writer.commit() writer.close() searcher = self.getSearcher() reader = searcher.getIndexReader() pos = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), "field", BytesRef("1")) pos.nextDoc() # first token should be at position 0 self.assertEqual(0, pos.nextPosition()) pos = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), "field", BytesRef("2")) pos.nextDoc() # second token should be at position 2 self.assertEqual(2, pos.nextPosition()) q = PhraseQuery() q.add(Term("field", "1")) q.add(Term("field", "2")) hits = searcher.search(q, None, 1000).scoreDocs self.assertEqual(0, len(hits)) # same as previous, just specify positions explicitely. q = PhraseQuery() q.add(Term("field", "1"), 0) q.add(Term("field", "2"), 1) hits = searcher.search(q, None, 1000).scoreDocs self.assertEqual(0, len(hits)) # specifying correct positions should find the phrase. q = PhraseQuery() q.add(Term("field", "1"), 0) q.add(Term("field", "2"), 2) hits = searcher.search(q, None, 1000).scoreDocs self.assertEqual(1, len(hits)) q = PhraseQuery() q.add(Term("field", "2")) q.add(Term("field", "3")) hits = searcher.search(q, None, 1000).scoreDocs self.assertEqual(1, len(hits)) q = PhraseQuery() q.add(Term("field", "3")) q.add(Term("field", "4")) hits = searcher.search(q, None, 1000).scoreDocs self.assertEqual(0, len(hits)) # phrase query would find it when correct positions are specified. q = PhraseQuery() q.add(Term("field", "3"), 0) q.add(Term("field", "4"), 0) hits = searcher.search(q, None, 1000).scoreDocs self.assertEqual(1, len(hits)) # phrase query should fail for non existing searched term # even if there exist another searched terms in the same searched # position. q = PhraseQuery() q.add(Term("field", "3"), 0) q.add(Term("field", "9"), 0) hits = searcher.search(q, None, 1000).scoreDocs self.assertEqual(0, len(hits)) # multi-phrase query should succed for non existing searched term # because there exist another searched terms in the same searched # position. mq = MultiPhraseQuery() mq.add([Term("field", "3"), Term("field", "9")], 0) hits = searcher.search(mq, None, 1000).scoreDocs self.assertEqual(1, len(hits)) q = PhraseQuery() q.add(Term("field", "2")) q.add(Term("field", "4")) hits = searcher.search(q, None, 1000).scoreDocs self.assertEqual(1, len(hits)) q = PhraseQuery() q.add(Term("field", "3")) q.add(Term("field", "5")) hits = searcher.search(q, None, 1000).scoreDocs self.assertEqual(1, len(hits)) q = PhraseQuery() q.add(Term("field", "4")) q.add(Term("field", "5")) hits = searcher.search(q, None, 1000).scoreDocs self.assertEqual(1, len(hits)) q = PhraseQuery() q.add(Term("field", "2")) q.add(Term("field", "5")) hits = searcher.search(q, None, 1000).scoreDocs self.assertEqual(0, len(hits)) def testPayloadsPos0(self): writer = self.getWriter(analyzer=TestPayloadAnalyzer()) doc = Document() doc.add(Field("content", "a a b c d e a f g h i j a b k k", TextField.TYPE_STORED)) writer.addDocument(doc) reader = writer.getReader() writer.close() tp = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), "content", BytesRef("a")) count = 0 self.assert_(tp.nextDoc() != tp.NO_MORE_DOCS) # "a" occurs 4 times self.assertEqual(4, tp.freq()) expected = 0 self.assertEqual(expected, tp.nextPosition()) self.assertEqual(1, tp.nextPosition()) self.assertEqual(3, tp.nextPosition()) self.assertEqual(6, tp.nextPosition()) # only one doc has "a" self.assert_(tp.nextDoc() == tp.NO_MORE_DOCS) searcher = self.getSearcher(reader=reader) stq1 = SpanTermQuery(Term("content", "a")) stq2 = SpanTermQuery(Term("content", "k")) sqs = [stq1, stq2] snq = SpanNearQuery(sqs, 30, False) count = 0 sawZero = False pspans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), snq) while pspans.next(): payloads = pspans.getPayload() sawZero |= pspans.start() == 0 it = payloads.iterator() while it.hasNext(): count += 1 it.next() self.assertEqual(5, count) self.assert_(sawZero) spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), snq) count = 0 sawZero = False while spans.next(): count += 1 sawZero |= spans.start() == 0 self.assertEqual(4, count) self.assert_(sawZero) sawZero = False psu = PayloadSpanUtil(searcher.getTopReaderContext()) pls = psu.getPayloadsForQuery(snq) count = pls.size() it = pls.iterator() while it.hasNext(): bytes = JArray('byte').cast_(it.next()) s = bytes.string_ sawZero |= s == "pos: 0" self.assertEqual(5, count) self.assert_(sawZero) class StopWhitespaceAnalyzer(PythonAnalyzer): def __init__(self, enablePositionIncrements): super(StopWhitespaceAnalyzer, self).__init__() self.enablePositionIncrements = enablePositionIncrements def createComponents(self, fieldName, reader): class _stopFilter(PythonFilteringTokenFilter): def __init__(_self, tokenStream): super(_stopFilter, _self).__init__(Version.LUCENE_CURRENT, tokenStream) _self.termAtt = _self.addAttribute(CharTermAttribute.class_); def accept(_self): return _self.termAtt.toString() != "stop" source = WhitespaceTokenizer(Version.LUCENE_CURRENT, reader) return Analyzer.TokenStreamComponents(source, _stopFilter(source)) class TestPayloadAnalyzer(PythonAnalyzer): def createComponents(self, fieldName, reader): source = LowerCaseTokenizer(Version.LUCENE_CURRENT, reader) return Analyzer.TokenStreamComponents(source, PayloadFilter(source, fieldName)) class PayloadFilter(PythonTokenFilter): def __init__(self, input, fieldName): super(PayloadFilter, self).__init__(input) self.input = input self.fieldName = fieldName self.pos = 0 self.i = 0 self.posIncrAttr = input.addAttribute(PositionIncrementAttribute.class_) self.payloadAttr = input.addAttribute(PayloadAttribute.class_) self.termAttr = input.addAttribute(CharTermAttribute.class_) def incrementToken(self): if self.input.incrementToken(): bytes = JArray('byte')("pos: %d" %(self.pos)) self.payloadAttr.setPayload(BytesRef(bytes)) if self.pos == 0 or self.i % 2 == 1: posIncr = 1 else: posIncr = 0 self.posIncrAttr.setPositionIncrement(posIncr) self.pos += posIncr self.i += 1 return True return False if __name__ == "__main__": lucene.initVM(vmargs=['-Djava.awt.headless=true']) if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: unittest.main() except: pass else: unittest.main() pylucene-4.10.1-1/test/test_PrefixFilter.py000644 000765 000000 00000010171 12162654000 020771 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import sys, lucene, unittest from PyLuceneTestCase import PyLuceneTestCase from org.apache.lucene.document import Document, Field, StringField from org.apache.lucene.index import Term from org.apache.lucene.search import ConstantScoreQuery, PrefixFilter class PrefixFilterTestCase(PyLuceneTestCase): """ Unit tests ported from Java Lucene """ def testPrefixFilter(self): writer = self.getWriter() categories = ["/Computers/Linux", "/Computers/Mac/One", "/Computers/Mac/Two", "/Computers/Windows"] for category in categories: doc = Document() doc.add(Field("category", category, StringField.TYPE_STORED)) writer.addDocument(doc) writer.close() # PrefixFilter combined with ConstantScoreQuery filter = PrefixFilter(Term("category", "/Computers")) query = ConstantScoreQuery(filter) searcher = self.getSearcher() topDocs = searcher.search(query, 50) self.assertEqual(4, topDocs.totalHits, "All documents in /Computers category and below") # test middle of values filter = PrefixFilter(Term("category", "/Computers/Mac")) query = ConstantScoreQuery(filter) topDocs = searcher.search(query, 50) self.assertEqual(2, topDocs.totalHits, "Two in /Computers/Mac") # test start of values filter = PrefixFilter(Term("category", "/Computers/Linux")) query = ConstantScoreQuery(filter) topDocs = searcher.search(query, 50) self.assertEqual(1, topDocs.totalHits, "One in /Computers/Linux") # test end of values filter = PrefixFilter(Term("category", "/Computers/Windows")) query = ConstantScoreQuery(filter) topDocs = searcher.search(query, 50) self.assertEqual(1, topDocs.totalHits, "One in /Computers/Windows") # test non-existant filter = PrefixFilter(Term("category", "/Computers/ObsoleteOS")) query = ConstantScoreQuery(filter) topDocs = searcher.search(query, 50) self.assertEqual(0, topDocs.totalHits, "no documents") # test non-existant, before values filter = PrefixFilter(Term("category", "/Computers/AAA")) query = ConstantScoreQuery(filter) topDocs = searcher.search(query, 50) self.assertEqual(0, topDocs.totalHits, "no documents") # test non-existant, after values filter = PrefixFilter(Term("category", "/Computers/ZZZ")) query = ConstantScoreQuery(filter) topDocs = searcher.search(query, 50) self.assertEqual(0, topDocs.totalHits, "no documents") # test zero-length prefix filter = PrefixFilter(Term("category", "")) query = ConstantScoreQuery(filter) topDocs = searcher.search(query, 50) self.assertEqual(4, topDocs.totalHits, "all documents") # test non-existant field filter = PrefixFilter(Term("nonexistantfield", "/Computers")) query = ConstantScoreQuery(filter) topDocs = searcher.search(query, 50) self.assertEqual(0, topDocs.totalHits, "no documents") if __name__ == "__main__": lucene.initVM(vmargs=['-Djava.awt.headless=true']) if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: unittest.main() except: pass else: unittest.main() pylucene-4.10.1-1/test/test_PrefixQuery.py000644 000765 000000 00000004112 12162654000 020647 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import sys, lucene, unittest from PyLuceneTestCase import PyLuceneTestCase from org.apache.lucene.document import Document, Field, StringField from org.apache.lucene.index import Term from org.apache.lucene.search import PrefixQuery class PrefixQueryTestCase(PyLuceneTestCase): """ Unit tests ported from Java Lucene """ def testPrefixQuery(self): writer = self.getWriter() categories = ["/Computers", "/Computers/Mac", "/Computers/Windows"] for category in categories: doc = Document() doc.add(Field("category", category, StringField.TYPE_STORED)) writer.addDocument(doc) writer.close() query = PrefixQuery(Term("category", "/Computers")) searcher = self.getSearcher() topDocs = searcher.search(query, 50) self.assertEqual(3, topDocs.totalHits, "All documents in /Computers category and below") query = PrefixQuery(Term("category", "/Computers/Mac")) topDocs = searcher.search(query, 50) self.assertEqual(1, topDocs.totalHits, "One in /Computers/Mac") if __name__ == "__main__": lucene.initVM(vmargs=['-Djava.awt.headless=true']) if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: unittest.main() except: pass else: unittest.main() pylucene-4.10.1-1/test/test_PyLucene.py000644 000765 000000 00000027340 12356527510 020132 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import sys, lucene, unittest import os, shutil from java.io import File, StringReader from org.apache.lucene.analysis.core import WhitespaceAnalyzer from org.apache.lucene.analysis.miscellaneous import LimitTokenCountAnalyzer from org.apache.lucene.analysis.standard import StandardAnalyzer from org.apache.lucene.document import \ Document, Field, StoredField, StringField, TextField from org.apache.lucene.index import \ IndexWriter, IndexWriterConfig, DirectoryReader, MultiFields, Term from org.apache.lucene.queryparser.classic import \ MultiFieldQueryParser, QueryParser from org.apache.lucene.search import BooleanClause, IndexSearcher, TermQuery from org.apache.lucene.store import MMapDirectory, SimpleFSDirectory from org.apache.lucene.util import BytesRefIterator, Version class Test_PyLuceneBase(object): def getAnalyzer(self): return StandardAnalyzer(Version.LUCENE_CURRENT) def openStore(self): raise NotImplemented def closeStore(self, store, *args): pass def getWriter(self, store, analyzer=None, create=False): if analyzer is None: analyzer = WhitespaceAnalyzer(Version.LUCENE_CURRENT) analyzer = LimitTokenCountAnalyzer(analyzer, 10000) config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer) if create: config.setOpenMode(IndexWriterConfig.OpenMode.CREATE) writer = IndexWriter(store, config) return writer def getReader(self, store, analyzer): pass def getSearcher(self, store): return IndexSearcher(DirectoryReader.open(store)) def test_indexDocument(self): store = self.openStore() writer = None try: analyzer = self.getAnalyzer() writer = self.getWriter(store, analyzer, True) doc = Document() doc.add(Field("title", "value of testing", TextField.TYPE_STORED)) doc.add(Field("docid", str(1), StringField.TYPE_NOT_STORED)) doc.add(Field("owner", "unittester", StringField.TYPE_STORED)) doc.add(Field("search_name", "wisdom", StoredField.TYPE)) doc.add(Field("meta_words", "rabbits are beautiful", TextField.TYPE_NOT_STORED)) writer.addDocument(doc) finally: self.closeStore(store, writer) def test_indexDocumentWithText(self): store = self.openStore() writer = None try: analyzer = self.getAnalyzer() writer = self.getWriter(store, analyzer, True) doc = Document() doc.add(Field("title", "value of testing", TextField.TYPE_STORED)) doc.add(Field("docid", str(1), StringField.TYPE_NOT_STORED)) doc.add(Field("owner", "unittester", StringField.TYPE_STORED)) doc.add(Field("search_name", "wisdom", StoredField.TYPE)) doc.add(Field("meta_words", "rabbits are beautiful", TextField.TYPE_NOT_STORED)) body_text = "hello world" * 20 body_reader = StringReader(body_text) doc.add(Field("content", body_reader)) writer.addDocument(doc) finally: self.closeStore(store, writer) def test_indexDocumentWithUnicodeText(self): store = self.openStore() writer = None try: analyzer = self.getAnalyzer() writer = self.getWriter(store, analyzer, True) doc = Document() doc.add(Field("title", "value of testing", TextField.TYPE_STORED)) doc.add(Field("docid", str(1), StringField.TYPE_NOT_STORED)) doc.add(Field("owner", "unittester", StringField.TYPE_STORED)) doc.add(Field("search_name", "wisdom", StoredField.TYPE)) doc.add(Field("meta_words", "rabbits are beautiful", TextField.TYPE_NOT_STORED)) # using a unicode body cause problems, which seems very odd # since the python type is the same regardless affter doing # the encode body_text = u"hello world"*20 body_reader = StringReader(body_text) doc.add(Field("content", body_reader)) writer.addDocument(doc) finally: self.closeStore(store, writer) def test_searchDocuments(self): self.test_indexDocument() store = self.openStore() searcher = None try: searcher = self.getSearcher(store) query = QueryParser(Version.LUCENE_CURRENT, "title", self.getAnalyzer()).parse("value") topDocs = searcher.search(query, 50) self.assertEqual(topDocs.totalHits, 1) finally: self.closeStore(store) def test_searchDocumentsWithMultiField(self): """ Tests searching with MultiFieldQueryParser """ self.test_indexDocument() store = self.openStore() searcher = None try: searcher = self.getSearcher(store) SHOULD = BooleanClause.Occur.SHOULD query = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, "value", ["title", "docid"], [SHOULD, SHOULD], self.getAnalyzer()) topDocs = searcher.search(query, 50) self.assertEquals(1, topDocs.totalHits) finally: self.closeStore(store) def test_removeDocument(self): self.test_indexDocument() store = self.openStore() searcher = None writer = None try: searcher = self.getSearcher(store) query = TermQuery(Term("docid", str(1))) topDocs = searcher.search(query, 50) self.assertEqual(topDocs.totalHits, 1) # be careful with ids they are ephemeral docid = topDocs.scoreDocs[0].doc writer = self.getWriter(store) writer.deleteDocuments(Term("docid", str(1))) finally: self.closeStore(store, writer) store = self.openStore() searcher = None try: searcher = self.getSearcher(store) query = TermQuery(Term("docid", str(1))) topDocs = searcher.search(query, 50) self.assertEqual(topDocs.totalHits, 0) finally: self.closeStore(store) def test_removeDocuments(self): self.test_indexDocument() store = self.openStore() writer = None try: writer = self.getWriter(store) writer.deleteDocuments(Term('docid', str(1))) finally: self.closeStore(store, writer) store = self.openStore() searcher = None try: searcher = self.getSearcher(store) query = QueryParser(Version.LUCENE_CURRENT, "title", self.getAnalyzer()).parse("value") topDocs = searcher.search(query, 50) self.assertEqual(topDocs.totalHits, 0) finally: self.closeStore(store) def test_FieldEnumeration(self): self.test_indexDocument() store = self.openStore() writer = None try: analyzer = self.getAnalyzer() writer = self.getWriter(store, analyzer, False) doc = Document() doc.add(Field("title", "value of testing", TextField.TYPE_STORED)) doc.add(Field("docid", str(2), StringField.TYPE_NOT_STORED)) doc.add(Field("owner", "unittester", StringField.TYPE_STORED)) doc.add(Field("search_name", "wisdom", StoredField.TYPE)) doc.add(Field("meta_words", "rabbits are beautiful", TextField.TYPE_NOT_STORED)) writer.addDocument(doc) doc = Document() doc.add(Field("owner", "unittester", StringField.TYPE_NOT_STORED)) doc.add(Field("search_name", "wisdom", StoredField.TYPE)) doc.add(Field("meta_words", "rabbits are beautiful", TextField.TYPE_NOT_STORED)) writer.addDocument(doc) finally: self.closeStore(store, writer) store = self.openStore() reader = None try: reader = DirectoryReader.open(store) term_enum = MultiFields.getTerms(reader, "docid").iterator(None) docids = [term.utf8ToString() for term in BytesRefIterator.cast_(term_enum)] self.assertEqual(len(docids), 2) finally: self.closeStore(store, reader) def test_getFieldInfos(self): self.test_indexDocument() store = self.openStore() reader = None try: reader = DirectoryReader.open(store) fieldInfos = MultiFields.getMergedFieldInfos(reader) for fieldInfo in fieldInfos.iterator(): self.assert_(fieldInfo.name in ['owner', 'search_name', 'meta_words', 'docid', 'title']) if fieldInfo.isIndexed(): self.assert_(fieldInfo.name in ['owner', 'meta_words', 'docid', 'title']) if fieldInfo.isIndexed() and not fieldInfo.hasVectors(): self.assert_(fieldInfo.name in ['owner', 'meta_words', 'docid', 'title']) finally: store = self.closeStore(store, reader) class Test_PyLuceneWithFSStore(unittest.TestCase, Test_PyLuceneBase): STORE_DIR = "testrepo" def setUp(self): if not os.path.exists(self.STORE_DIR): os.mkdir(self.STORE_DIR) def tearDown(self): if os.path.exists(self.STORE_DIR): shutil.rmtree(self.STORE_DIR) def openStore(self): return SimpleFSDirectory(File(self.STORE_DIR)) def closeStore(self, store, *args): for arg in args: if arg is not None: arg.close() store.close() class Test_PyLuceneWithMMapStore(Test_PyLuceneWithFSStore): def openStore(self): return MMapDirectory(File(self.STORE_DIR)) if __name__ == "__main__": lucene.initVM(vmargs=['-Djava.awt.headless=true']) if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: unittest.main() except: pass else: unittest.main() pylucene-4.10.1-1/test/test_PyLuceneThread.py000644 000765 000000 00000007720 12162654000 021250 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import sys, lucene, unittest import time, threading from lucene import getVMEnv from PyLuceneTestCase import PyLuceneTestCase from java.lang import Thread from org.apache.lucene.analysis.standard import StandardAnalyzer from org.apache.lucene.document import Document, Field, TextField from org.apache.lucene.index import Term from org.apache.lucene.search import PhraseQuery, TermQuery from org.apache.lucene.util import Version class PyLuceneThreadTestCase(PyLuceneTestCase): """ Test using threads in PyLucene with python threads """ def setUp(self): super(PyLuceneThreadTestCase, self).setUp() self.classLoader = Thread.currentThread().getContextClassLoader() writer = self.getWriter(analyzer=StandardAnalyzer(Version.LUCENE_CURRENT)) doc1 = Document() doc2 = Document() doc3 = Document() doc4 = Document() doc1.add(Field("field", "one", TextField.TYPE_STORED)) doc2.add(Field("field", "two", TextField.TYPE_STORED)) doc3.add(Field("field", "three", TextField.TYPE_STORED)) doc4.add(Field("field", "one", TextField.TYPE_STORED)) writer.addDocument(doc1) writer.addDocument(doc2) writer.addDocument(doc3) writer.addDocument(doc4) writer.commit() writer.close() self.testData = [('one',2), ('two',1), ('three', 1), ('five', 0)] * 500 self.lock = threading.Lock() self.totalQueries = 0 def testWithMainThread(self): """ warm up test for runSearch in main thread """ self.runSearch(2000, True) def testWithPyLuceneThread(self): """ Run 5 threads with 2000 queries each """ threads = [] for i in xrange(5): threads.append(threading.Thread(target=self.runSearch, args=(2000,))) for thread in threads: thread.start() for thread in threads: thread.join() # we survived! # and all queries have ran successfully self.assertEqual(10000, self.totalQueries) def runSearch(self, runCount, mainThread=False): """ search for runCount number of times """ # problem: if there are any assertion errors in the child # thread, the calling thread is not notified and may still # consider the test case pass. We are using self.totalQueries # to double check that work has actually been done. if not mainThread: getVMEnv().attachCurrentThread() time.sleep(0.5) searcher = self.getSearcher() try: self.query = PhraseQuery() for word, count in self.testData[0:runCount]: query = TermQuery(Term("field", word)) topDocs = searcher.search(query, 50) self.assertEqual(topDocs.totalHits, count) self.lock.acquire() self.totalQueries += 1 self.lock.release() finally: del searcher if __name__ == "__main__": lucene.initVM(vmargs=['-Djava.awt.headless=true']) if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: unittest.main() except: pass else: unittest.main() pylucene-4.10.1-1/test/test_PythonDirectory.py000644 000765 000000 00000021004 12356527510 021543 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import sys, lucene, unittest import os, shutil import test_PyLucene from binascii import crc32 from threading import RLock from lucene import JavaError, JArray from java.lang import String from java.io import IOException from org.apache.pylucene.store import \ PythonLock, PythonLockFactory, \ PythonIndexInput, PythonIndexOutput, PythonDirectory """ The Directory Implementation here is for testing purposes only, not meant as an example of writing one, the implementation here suffers from a lack of safety when dealing with concurrent modifications as it does away with the file locking in the default lucene fsdirectory implementation. """ DEBUG = False class DebugWrapper(object): def __init__(self, obj): self.obj = obj def __getattr__(self, name): print self.obj.__class__.__name__, self.obj.name, name sys.stdout.flush() return getattr(self.obj, name) class DebugFactory(object): def __init__(self, klass): self.klass = klass def __call__(self, *args, **kw): instance = self.klass(*args, **kw) return DebugWrapper(instance) class PythonDirLock(PythonLock): # only safe for a single process def __init__(self, name, path, lock): super(PythonDirLock, self).__init__() self.name = name self.lock_file = path self.lock = lock def isLocked(self): return self.lock.locked() def obtain(self): return self.lock.acquire() def release(self): return self.lock.release() def close(self): if hasattr(self.lock, 'close'): self.lock.close() class PythonDirLockFactory(PythonLockFactory): def __init__(self, path): super(PythonDirLockFactory, self).__init__() self.path = path self._locks = {} def makeLock(self, name): lock = self._locks.get(name) if lock is None: lock = PythonDirLock(name, os.path.join(self.path, name), RLock()) self._locks[name] = lock return lock def clearLock(self, name): lock = self._locks.pop(name, None) if lock is not None: lock.release() class PythonFileStreamInput(PythonIndexInput): def __init__(self, name, fh, size, clone=False): if not clone: super(PythonFileStreamInput, self).__init__(name, size) self.name = name self.fh = fh self._length = size self.isOpen = True self.isClone = clone def length(self): return long(self._length) def clone(self): clone = PythonFileStreamInput(self.name, self.fh, self._length, True) return super(PythonFileStreamInput, self).clone(clone) def close(self): if self.isOpen: self.isOpen = False if not self.isClone: self.fh.close() def readInternal(self, length, pos): self.fh.seek(pos) return JArray('byte')(self.fh.read(length)) def seekInternal(self, pos): self.fh.seek(pos) class PythonFileStreamOutput(PythonIndexOutput): def __init__(self, name, fh): super(PythonFileStreamOutput, self).__init__() self.name = name self.fh = fh self.isOpen = True self._length = 0 self.crc = None def close(self): if self.isOpen: self.isOpen = False self.fh.flush() self.fh.close() def getFilePointer(self): return long(self._length) def getChecksum(self): return long(self.crc & 0xffffffff) def writeByte(self, b): if b < 0: data = chr(b + 256) else: data = chr(b) self.fh.write(data) self._length += 1 if self.crc is None: self.crc = crc32(data) else: self.crc = crc32(data, self.crc) def writeBytes(self, bytes): data = bytes.string_ self.fh.write(data) self.fh.flush() self._length += len(data) if self.crc is None: self.crc = crc32(data) else: self.crc = crc32(data, self.crc) class PythonFileDirectory(PythonDirectory): def __init__(self, path): super(PythonFileDirectory, self).__init__() self._lockFactory = PythonDirLockFactory(path) self.name = path assert os.path.isdir(path) self.path = path self._streams = [] def close(self): for stream in self._streams: stream.close() del self._streams[:] def createOutput(self, name, context): file_path = os.path.join(self.path, name) fh = open(file_path, "wb") stream = PythonFileStreamOutput(name, fh) self._streams.append(stream) return stream def deleteFile(self, name): if self.fileExists(name): os.unlink(os.path.join(self.path, name)) def fileExists(self, name): return os.path.exists(os.path.join(self.path, name)) def fileLength(self, name): file_path = os.path.join(self.path, name) return long(os.path.getsize(file_path)) def fileModified(self, name): file_path = os.path.join(self.path, name) return os.path.getmtime(file_path) def listAll(self): return os.listdir(self.path) def sync(self, name): pass def openInput(self, name, bufferSize=0): file_path = os.path.join(self.path, name) try: fh = open(file_path, "rb") except IOError: raise JavaError, IOException(name) stream = PythonFileStreamInput(name, fh, os.path.getsize(file_path)) self._streams.append(stream) return stream def touchFile(self, name): file_path = os.path.join(self.path, name) os.utime(file_path, None) def setLockFactory(self, lockFactory): pass def getLockFactory(self): return None def clearLock(self, name): self._lockFactory.clearLock(name) def makeLock(self, name): return self._lockFactory.makeLock(name) if DEBUG: _globals = globals() _globals['PythonFileDirectory'] = DebugFactory(PythonFileDirectory) _globals['PythonFileStreamInput'] = DebugFactory(PythonFileStreamInput) _globals['PythonFileStreamOutput'] = DebugFactory(PythonFileStreamOutput) _globals['PythonDirLock'] = DebugFactory(PythonDirLock) del _globals class PythonDirectoryTests(unittest.TestCase, test_PyLucene.Test_PyLuceneBase): STORE_DIR = "testpyrepo" def setUp(self): if not os.path.exists(self.STORE_DIR): os.mkdir(self.STORE_DIR) def tearDown(self): if os.path.exists(self.STORE_DIR): shutil.rmtree(self.STORE_DIR) def openStore(self): return PythonFileDirectory(self.STORE_DIR) def closeStore(self, store, *args): for arg in args: if arg is not None: arg.close() store.close() def test_IncrementalLoop(self): print "Testing Indexing Incremental Looping" for i in range(100): print "indexing ", i sys.stdout.flush() self.test_indexDocument() if __name__ == "__main__": env = lucene.initVM(vmargs=['-Djava.awt.headless=true']) if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: unittest.main() except: pass print 'inputs', env._dumpRefs(True).get('class org.osafoundation.lucene.store.PythonIndexOutput', 0) print 'outputs', env._dumpRefs(True).get('class org.osafoundation.lucene.store.PythonIndexInput', 0) print 'locks', env._dumpRefs(True).get('class org.osafoundation.lucene.store.PythonLock', 0) print 'dirs', env._dumpRefs(True).get('class org.osafoundation.lucene.store.PythonLock', 0) else: unittest.main() pylucene-4.10.1-1/test/test_PythonException.py000644 000765 000000 00000003622 12413103072 021526 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import sys, lucene, unittest from PyLuceneTestCase import PyLuceneTestCase from org.apache.lucene.analysis.standard import StandardAnalyzer from org.apache.lucene.util import Version from org.apache.pylucene.queryparser.classic import PythonQueryParser class PythonExceptionTestCase(PyLuceneTestCase): def testThroughLayerException(self): class TestException(Exception): pass class TestQueryParser(PythonQueryParser): def getFieldQuery_quoted(_self, field, queryText, quoted): raise TestException("TestException") qp = TestQueryParser(Version.LUCENE_CURRENT, 'all', StandardAnalyzer(Version.LUCENE_CURRENT)) if lucene.getVMEnv().isShared(): with self.assertRaises(TestException): qp.parse("foo bar") else: with self.assertRaises(lucene.JavaError): qp.parse("foo bar") if __name__ == "__main__": lucene.initVM(vmargs=['-Djava.awt.headless=true']) if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: unittest.main() except: pass else: unittest.main() pylucene-4.10.1-1/test/test_PythonQueryParser.py000644 000765 000000 00000006214 12162654000 022055 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import sys, lucene, unittest from PyLuceneTestCase import PyLuceneTestCase from org.apache.lucene.analysis.standard import StandardAnalyzer from org.apache.lucene.index import Term from org.apache.lucene.search import BooleanClause, TermQuery from org.apache.lucene.util import Version from org.apache.pylucene.queryparser.classic import \ PythonQueryParser, PythonMultiFieldQueryParser class BooleanTestMixin(object): def getBooleanQuery(self, clauses, disableCoord): extra_query = TermQuery(Term("all", "extra_clause")) extra_clause = BooleanClause(extra_query, BooleanClause.Occur.SHOULD) clauses.add(extra_clause) return super(BooleanTestMixin, self).getBooleanQuery(clauses, disableCoord) class PythonQueryParserTestCase(PyLuceneTestCase): def testOverrideBooleanQuery(self): class TestQueryParser(BooleanTestMixin, PythonQueryParser): def getFieldQuery_quoted(_self, field, queryText, quoted): return super(TestQueryParser, _self).getFieldQuery_quoted_super(field, queryText, quoted) qp = TestQueryParser(Version.LUCENE_CURRENT, 'all', StandardAnalyzer(Version.LUCENE_CURRENT)) q = qp.parse("foo bar") self.assertEquals(str(q), "all:foo all:bar all:extra_clause") class PythonMultiFieldQueryParserTestCase(PyLuceneTestCase): def testOverrideBooleanQuery(self): class TestQueryParser(BooleanTestMixin, PythonMultiFieldQueryParser): def getFieldQuery_quoted(_self, field, queryText, quoted): return super(TestQueryParser, _self).getFieldQuery_quoted_super(field, queryText, quoted) qp = TestQueryParser(Version.LUCENE_CURRENT, ['one', 'two'], StandardAnalyzer(Version.LUCENE_CURRENT)) q = qp.parse(Version.LUCENE_CURRENT, "foo bar", ['one', 'two'], [BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD], StandardAnalyzer(Version.LUCENE_CURRENT)) self.assertEquals(str(q), "(one:foo one:bar) (two:foo two:bar)") if __name__ == "__main__": lucene.initVM(vmargs=['-Djava.awt.headless=true']) if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: unittest.main() except: pass else: unittest.main() pylucene-4.10.1-1/test/test_RegexQuery.py000644 000765 000000 00000006076 12162654000 020477 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import sys, lucene, unittest from PyLuceneTestCase import PyLuceneTestCase from org.apache.lucene.analysis.core import SimpleAnalyzer from org.apache.lucene.document import Document, Field, TextField from org.apache.lucene.index import Term from org.apache.lucene.sandbox.queries.regex import RegexQuery from org.apache.lucene.search.spans import \ SpanMultiTermQueryWrapper, SpanNearQuery class TestRegexQuery(PyLuceneTestCase): FN = "field" def setUp(self): super(TestRegexQuery, self).setUp() writer = self.getWriter(analyzer=SimpleAnalyzer(self.TEST_VERSION)) doc = Document() doc.add(Field(self.FN, "the quick brown fox jumps over the lazy dog", TextField.TYPE_NOT_STORED)) writer.addDocument(doc) writer.commit() writer.close() self.searcher = self.getSearcher() def tearDown(self): del self.searcher super(TestRegexQuery, self).tearDown() def newTerm(self, value): return Term(self.FN, value) def regexQueryNrHits(self, regex): query = RegexQuery(self.newTerm(regex)) return self.searcher.search(query, 50).totalHits def spanRegexQueryNrHits(self, regex1, regex2, slop, ordered): srq1 = SpanMultiTermQueryWrapper(RegexQuery(self.newTerm(regex1))) srq2 = SpanMultiTermQueryWrapper(RegexQuery(self.newTerm(regex2))) query = SpanNearQuery([srq1, srq2], slop, ordered) return self.searcher.search(query, 50).totalHits def testRegex1(self): self.assertEqual(1, self.regexQueryNrHits("^q.[aeiou]c.*$")) def testRegex2(self): self.assertEqual(0, self.regexQueryNrHits("^.[aeiou]c.*$")) def testRegex3(self): self.assertEqual(0, self.regexQueryNrHits("^q.[aeiou]c$")) def testSpanRegex1(self): self.assertEqual(1, self.spanRegexQueryNrHits("^q.[aeiou]c.*$", "dog", 6, True)) def testSpanRegex2(self): self.assertEqual(0, self.spanRegexQueryNrHits("^q.[aeiou]c.*$", "dog", 5, True)) if __name__ == "__main__": lucene.initVM(vmargs=['-Djava.awt.headless=true']) if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: unittest.main() except: pass else: unittest.main() pylucene-4.10.1-1/test/test_RewriteQuery.py000644 000765 000000 00000003562 12162654000 021043 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== # Originally intended to demonstrate a memory leak. See # http://lists.osafoundation.org/pipermail/pylucene-dev/2008-October/002937.html # and followup import sys, lucene, unittest from PyLuceneTestCase import PyLuceneTestCase from org.apache.lucene.analysis.standard import StandardAnalyzer from org.apache.lucene.index import Term from org.apache.lucene.search import TermQuery from org.apache.lucene.util import Version class QueryRewriteTest(PyLuceneTestCase): def setUp(self): super(QueryRewriteTest, self).setUp() writer = self.getWriter(analyzer=StandardAnalyzer(Version.LUCENE_CURRENT)) writer.close() self.reader = self.getReader() self.term = Term('all', 'foo') def testQuery(self): base_query = TermQuery(self.term) new_query = base_query.rewrite(self.reader) self.assertEquals(base_query, new_query) if __name__ == "__main__": env = lucene.initVM(vmargs=['-Djava.awt.headless=true']) if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: unittest.main() except: pass else: unittest.main() pylucene-4.10.1-1/test/test_Similarity.py000644 000765 000000 00000010353 12162654000 020516 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import sys, lucene, unittest from PyLuceneTestCase import PyLuceneTestCase from org.apache.lucene.analysis.core import SimpleAnalyzer from org.apache.lucene.document import Document, Field, TextField from org.apache.lucene.index import Term from org.apache.lucene.search import \ BooleanClause, BooleanQuery, Explanation, PhraseQuery, TermQuery from org.apache.lucene.util import Version from org.apache.pylucene.search import PythonCollector from org.apache.pylucene.search.similarities import PythonDefaultSimilarity class SimpleSimilarity(PythonDefaultSimilarity): def queryNorm(self, sumOfSquaredWeights): return 1.0 def coord(self, overlap, maxOverlap): return 1.0 def lengthNorm(self, state): return state.getBoost() def tf(self, freq): return freq def sloppyFreq(self, distance): return 2.0 def idf(self, docFreq, numDocs): return 1.0 def idfExplain(self, collectionStats, termStats): return Explanation(1.0, "inexplicable") class SimilarityTestCase(PyLuceneTestCase): """ Unit tests ported from Java Lucene """ def testSimilarity(self): writer = self.getWriter(analyzer=SimpleAnalyzer(Version.LUCENE_CURRENT), similarity=SimpleSimilarity()) d1 = Document() d1.add(Field("field", "a c", TextField.TYPE_STORED)) d2 = Document() d2.add(Field("field", "a b c", TextField.TYPE_STORED)) writer.addDocument(d1) writer.addDocument(d2) writer.commit() writer.close() searcher = self.getSearcher() searcher.setSimilarity(SimpleSimilarity()) a = Term("field", "a") b = Term("field", "b") c = Term("field", "c") class collector1(PythonCollector): def collect(_self, doc, score): self.assertEqual(1.0, score) def setNextReader(_self, context): pass def acceptsDocsOutOfOrder(_self): return True searcher.search(TermQuery(b), collector1()) bq = BooleanQuery() bq.add(TermQuery(a), BooleanClause.Occur.SHOULD) bq.add(TermQuery(b), BooleanClause.Occur.SHOULD) class collector2(PythonCollector): def collect(_self, doc, score): self.assertEqual(doc + _self.base + 1, score) def setNextReader(_self, context): _self.base = context.docBase def acceptsDocsOutOfOrder(_self): return True searcher.search(bq, collector2()) pq = PhraseQuery() pq.add(a) pq.add(c) class collector3(PythonCollector): def collect(_self, doc, score): self.assertEqual(1.0, score) def setNextReader(_self, context): pass def acceptsDocsOutOfOrder(_self): return True searcher.search(pq, collector3()) pq.setSlop(2) class collector4(PythonCollector): def collect(_self, doc, score): self.assertEqual(2.0, score) def setNextReader(_self, context): pass def acceptsDocsOutOfOrder(_self): return True searcher.search(pq, collector4()) if __name__ == "__main__": lucene.initVM(vmargs=['-Djava.awt.headless=true']) if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: unittest.main() except: pass else: unittest.main() pylucene-4.10.1-1/test/test_Sort.py000644 000765 000000 00000140756 12162654000 017332 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import sys, lucene, unittest, math from PyLuceneTestCase import PyLuceneTestCase from itertools import izip from random import randint from java.lang import Byte, Double, Float, Integer, Long, Short from java.util import BitSet from java.util.concurrent import Executors, TimeUnit from org.apache.lucene.analysis.core import SimpleAnalyzer from org.apache.lucene.codecs import Codec from org.apache.lucene.document import \ Document, Field, FieldType, StringField, StoredField, TextField, \ NumericDocValuesField, SortedDocValuesField, BinaryDocValuesField, \ FloatDocValuesField from org.apache.lucene.index import \ FieldInfo, LogDocMergePolicy, MultiReader, Term from org.apache.lucene.search import \ BooleanQuery, BooleanClause, FieldCache, IndexSearcher, MatchAllDocsQuery, Sort, \ SortField, TermQuery, TopFieldCollector from org.apache.lucene.store import RAMDirectory from org.apache.lucene.util import \ BytesRef, DocIdBitSet, FieldCacheSanityChecker, NamedThreadFactory, Version from org.apache.pylucene.search import \ PythonIntParser, PythonFloatParser, PythonLongParser, PythonDoubleParser, \ PythonByteParser, PythonShortParser, \ PythonFieldComparator, PythonFieldComparatorSource, PythonFilter NUM_STRINGS = 750 class SortTestCase(PyLuceneTestCase): """ Unit tests for sorting code, ported from Java Lucene """ def __init__(self, *args, **kwds): super(SortTestCase, self).__init__(*args, **kwds) self.supportsDocValues = Codec.getDefault().getName() > "Lucene3x" self.data = [ # tracer contents int float string custom i18n long double, short, byte, 'custom parser encoding' [ "A", "x a", "5", "4f", "c", "A-3", "p\u00EAche", "10", "-4.0", "3", "126", "J" ], # A, x [ "B", "y a", "5", "3.4028235E38", "i", "B-10", "HAT", "1000000000", "40.0", "24", "1", "I" ], # B, y [ "C", "x a b c", "2147483647", "1.0", "j", "A-2", "p\u00E9ch\u00E9", "99999999","40.00002343", "125", "15", "H" ], # C, x [ "D", "y a b c", "-1", "0.0f", "a", "C-0", "HUT", str(Long.MAX_VALUE), str(Double.MIN_VALUE), str(Short.MIN_VALUE), str(Byte.MIN_VALUE), "G" ], # D, y [ "E", "x a b c d", "5", "2f", "h", "B-8", "peach", str(Long.MIN_VALUE), str(Double.MAX_VALUE), str(Short.MAX_VALUE), str(Byte.MAX_VALUE), "F" ], # E, x [ "F", "y a b c d", "2", "3.14159f", "g", "B-1", "H\u00C5T", "-44", "343.034435444", "-3", "0", "E" ], # F, y [ "G", "x a b c d", "3", "-1.0", "f", "C-100", "sin", "323254543543", "4.043544", "5", "100", "D" ], # G, x [ "H", "y a b c d", "0", "1.4E-45", "e", "C-88", "H\u00D8T", "1023423423005","4.043545", "10", "-50", "C" ], # H, y [ "I", "x a b c d e f", "-2147483648", "1.0e+0", "d", "A-10", "s\u00EDn", "332422459999", "4.043546", "-340", "51", "B" ], # I, x [ "J", "y a b c d e f", "4", ".5", "b", "C-7", "HOT", "34334543543", "4.0000220343", "300", "2", "A" ], # J, y [ "W", "g", "1", None, None, None, None, None, None, None, None, None ], [ "X", "g", "1", "0.1", None, None, None, None, None, None, None, None ], [ "Y", "g", "1", "0.2", None, None, None, None, None, None, None, None ], [ "Z", "f g", None, None, None, None, None, None, None, None, None, None ], # Sort Missing first/last [ "a", "m", None, None, None, None, None, None, None, None, None, None ], [ "b", "m", "4", "4.0", "4", None, None, "4", "4", "4", "4", None ], [ "c", "m", "5", "5.0", "5", None, None, "5", "5", "5", "5", None ], [ "d", "m", None, None, None, None, None, None, None, None, None, None ], ] def setUp(self): super(SortTestCase, self).setUp() self.dirs = [] self.dvStringSorted = self.getRandomBoolean() # run the randomization at setup so that threads share it and we don't # hit cache incompatibilities self.notSorted = self.getRandomBoolean() # If you index as sorted source you can still sort by value instead: self.sortByValue = self.getRandomBoolean() self.full = self._getFullIndex() self.searchX = self._getXIndex() self.searchY = self._getYIndex() self.queryX = TermQuery(Term("contents", "x")) self.queryY = TermQuery(Term("contents", "y")) self.queryA = TermQuery(Term("contents", "a")) self.queryE = TermQuery(Term("contents", "e")) self.queryF = TermQuery(Term("contents", "f")) self.queryG = TermQuery(Term("contents", "g")) self.queryM = TermQuery(Term("contents", "m")) self.sort = Sort() def tearDown(self): for directory in self.dirs: directory.close() super(SortTestCase, self).tearDown() def _getIndex(self, even, odd): mergePolicy = LogDocMergePolicy() mergePolicy.setMergeFactor(1000) directory = RAMDirectory() self.dirs.append(directory) writer = self.getWriter(directory=directory, analyzer=SimpleAnalyzer(Version.LUCENE_CURRENT), maxBufferedDocs=2, mergePolicy=mergePolicy) if self.dvStringSorted: # Index sorted stringDVType = FieldInfo.DocValuesType.SORTED elif self.notSorted: # Index non-sorted stringDVType = FieldInfo.DocValuesType.BINARY else: # sorted anyway stringDVType = FieldInfo.DocValuesType.SORTED ft1 = FieldType() ft1.setStored(True) ft2 = FieldType() ft2.setIndexed(True) for i in xrange(len(self.data)): if (i % 2 == 0 and even) or (i % 2 == 1 and odd): doc = Document() doc.add(Field("tracer", self.data[i][0], ft1)) doc.add(TextField("contents", self.data[i][1], Field.Store.NO)) if self.data[i][2] is not None: doc.add(StringField("int", self.data[i][2], Field.Store.NO)) if self.supportsDocValues: doc.add(NumericDocValuesField("int_dv", Long.parseLong(self.data[i][2]))) if self.data[i][3] is not None: doc.add(StringField("float", self.data[i][3], Field.Store.NO)) if self.supportsDocValues: doc.add(FloatDocValuesField("float_dv", Float.parseFloat(self.data[i][3]))) if self.data[i][4] is not None: doc.add(StringField("string", self.data[i][4], Field.Store.NO)) if self.supportsDocValues: if stringDVType == FieldInfo.DocValuesType.SORTED: doc.add(SortedDocValuesField("string_dv", BytesRef(self.data[i][4]))) elif stringDVType == FieldInfo.DocValuesType.BINARY: doc.add(BinaryDocValuesField("string_dv", BytesRef(self.data[i][4]))) else: raise ValueError("unknown type " + stringDVType) if self.data[i][5] is not None: doc.add(StringField("custom", self.data[i][5], Field.Store.NO)) if self.data[i][6] is not None: doc.add(StringField("i18n", self.data[i][6], Field.Store.NO)) if self.data[i][7] is not None: doc.add(StringField("long", self.data[i][7], Field.Store.NO)) if self.data[i][8] is not None: doc.add(StringField("double", self.data[i][8], Field.Store.NO)) if self.supportsDocValues: doc.add(NumericDocValuesField("double_dv", Double.doubleToRawLongBits(Double.parseDouble(self.data[i][8])))) if self.data[i][9] is not None: doc.add(StringField("short", self.data[i][9], Field.Store.NO)) if self.data[i][10] is not None: doc.add(StringField("byte", self.data[i][10], Field.Store.NO)) if self.data[i][11] is not None: doc.add(StringField("parser", self.data[i][11], Field.Store.NO)) for f in doc.getFields(): if f.fieldType().indexed() and not f.fieldType().omitNorms(): Field.cast_(f).setBoost(2.0) writer.addDocument(doc) reader = writer.getReader() writer.close() return self.getSearcher(reader=reader) def _getFullIndex(self): return self._getIndex(True, True) def _getFullStrings(self): mergePolicy = LogDocMergePolicy() mergePolicy.setMergeFactor(97) directory = RAMDirectory() self.dirs.append(directory) writer = self.getWriter(directory=directory, analyzer=SimpleAnalyzer(Version.LUCENE_CURRENT), maxBufferedDocs=4, mergePolicy=mergePolicy) onlyStored = FieldType() onlyStored.setStored(True) fixedLen = self.getRandomNumber(2, 8) fixedLen2 = self.getRandomNumber(1, 4) for i in xrange(NUM_STRINGS): doc = Document() num = self.getRandomCharString(self.getRandomNumber(2, 8), 48, 52) doc.add(Field("tracer", num, onlyStored)) doc.add(StringField("string", num, Field.Store.NO)) if self.supportsDocValues: if self.dvStringSorted: doc.add(SortedDocValuesField("string_dv", BytesRef(num))) else: doc.add(BinaryDocValuesField("string_dv", BytesRef(num))) num2 = self.getRandomCharString(self.getRandomNumber(1, 4), 48, 50) doc.add(StringField("string2", num2, Field.Store.NO)) if self.supportsDocValues: if self.dvStringSorted: doc.add(SortedDocValuesField("string2_dv", BytesRef(num2))) else: doc.add(BinaryDocValuesField("string2_dv", BytesRef(num2))) doc.add(Field("tracer2", num2, onlyStored)) for f2 in doc.getFields(): if f2.fieldType().indexed() and not f2.fieldType().omitNorms(): Field.cast_(f2).setBoost(2.0) numFixed = self.getRandomCharString(fixedLen, 48, 52) doc.add(Field("fixed_tracer", numFixed, onlyStored)) doc.add(StringField("string_fixed", numFixed, Field.Store.NO)) if self.supportsDocValues: if self.dvStringSorted: doc.add(SortedDocValuesField("string_fixed_dv", BytesRef(numFixed))) else: doc.add(BinaryDocValuesField("string_fixed_dv", BytesRef(numFixed))) num2Fixed = self.getRandomCharString(fixedLen2, 48, 52) doc.add(StringField("string2_fixed", num2Fixed, Field.Store.NO)) if self.supportsDocValues: if self.dvStringSorted: doc.add(SortedDocValuesField("string2_fixed_dv", BytesRef(num2Fixed))) else: doc.add(BinaryDocValuesField("string2_fixed_dv", BytesRef(num2Fixed))) doc.add(Field("tracer2_fixed", num2Fixed, onlyStored)) for f2 in doc.getFields(): if f2.fieldType().indexed() and not f2.fieldType().omitNorms(): Field.cast_(f2).setBoost(2.0) writer.addDocument(doc) writer.close() return self.getSearcher(directory=directory) def getRandomNumberString(self, num, low, high): return ''.join([self.getRandomNumber(low, high) for i in xrange(num)]) def getRandomCharString(self, num): return self.getRandomCharString(num, 48, 122) def getRandomCharString(self, num, start, end): return ''.join([chr(self.getRandomNumber(start, end)) for i in xrange(num)]) def getRandomNumber(self, low, high): return randint(low, high) def getRandomBoolean(self): return randint(0, 1) == 1 def _getXIndex(self): return self._getIndex(True, False) def _getYIndex(self): return self._getIndex(False, True) def _getEmptyIndex(self): return self._getIndex(False, False) def testBuiltInSorts(self): """ test the sorts by score and document number """ sort = self.sort self._assertMatches(self.full, self.queryX, sort, "ACEGI") self._assertMatches(self.full, self.queryY, sort, "BDFHJ") sort.setSort(SortField.FIELD_DOC) self._assertMatches(self.full, self.queryX, sort, "ACEGI") self._assertMatches(self.full, self.queryY, sort, "BDFHJ") def testTypedSort(self): """ test sorts where the type of field is specified """ sort = self.sort sort.setSort([SortField("int", SortField.Type.INT), SortField.FIELD_DOC]) self._assertMatches(self.full, self.queryX, sort, "IGAEC") self._assertMatches(self.full, self.queryY, sort, "DHFJB") sort.setSort([SortField("float", SortField.Type.FLOAT), SortField.FIELD_DOC]) self._assertMatches(self.full, self.queryX, sort, "GCIEA") self._assertMatches(self.full, self.queryY, sort, "DHJFB") sort.setSort([SortField("long", SortField.Type.LONG), SortField.FIELD_DOC]) self._assertMatches(self.full, self.queryX, sort, "EACGI") self._assertMatches(self.full, self.queryY, sort, "FBJHD") sort.setSort([SortField("double", SortField.Type.DOUBLE), SortField.FIELD_DOC]) self._assertMatches(self.full, self.queryX, sort, "AGICE") self._assertMatches(self.full, self.queryY, sort, "DJHBF") sort.setSort([SortField("byte", SortField.Type.BYTE), SortField.FIELD_DOC]) self._assertMatches(self.full, self.queryX, sort, "CIGAE") self._assertMatches(self.full, self.queryY, sort, "DHFBJ") sort.setSort([SortField("short", SortField.Type.SHORT), SortField.FIELD_DOC]) self._assertMatches(self.full, self.queryX, sort, "IAGCE") self._assertMatches(self.full, self.queryY, sort, "DFHBJ") sort.setSort([SortField("string", SortField.Type.STRING), SortField.FIELD_DOC]) self._assertMatches(self.full, self.queryX, sort, "AIGEC") self._assertMatches(self.full, self.queryY, sort, "DJHFB") if self.supportsDocValues: sort.setSort([SortField("int_dv", SortField.Type.INT), SortField.FIELD_DOC]) self._assertMatches(self.full, self.queryX, sort, "IGAEC") self._assertMatches(self.full, self.queryY, sort, "DHFJB") sort.setSort([SortField("float_dv", SortField.Type.FLOAT), SortField.FIELD_DOC]) self._assertMatches(self.full, self.queryX, sort, "GCIEA") self._assertMatches(self.full, self.queryY, sort, "DHJFB") sort.setSort([SortField("double_dv", SortField.Type.DOUBLE), SortField.FIELD_DOC]) self._assertMatches(self.full, self.queryX, sort, "AGICE") self._assertMatches(self.full, self.queryY, sort, "DJHBF") sort.setSort([SortField("string_dv", self._getDVStringSortType()), SortField.FIELD_DOC]) self._assertMatches(self.full, self.queryX, sort, "AIGEC") self._assertMatches(self.full, self.queryY, sort, "DJHFB") def _getDVStringSortType(self, allowSorted=True): if self.dvStringSorted and allowSorted: if self.sortByValue: return SortField.Type.STRING_VAL else: return SortField.Type.STRING else: return SortField.Type.STRING_VAL def _verifyStringSort(self, sort): searcher = self._getFullStrings() result = searcher.search(MatchAllDocsQuery(), None, self.getRandomNumber(500, searcher.getIndexReader().maxDoc()), sort).scoreDocs buff = [] n = len(result) last = None lastSub = None lastDocId = 0 fail = False if "_fixed" in sort.getSort()[0].getField(): fieldSuffix = "_fixed" else: fieldSuffix = "" for scoreDoc in result: doc2 = searcher.doc(scoreDoc.doc) v = doc2.getValues("tracer" + fieldSuffix) v2 = doc2.getValues("tracer2" + fieldSuffix) for _v, _v2 in izip(v, v2): buff.append(_v + "(" + _v2 + ")(" + str(scoreDoc.doc) + ")\n") if last is not None: _cmp = cmp(_v, last) if _cmp < 0: # ensure first field is in order fail = True print "fail:", _v, "<", last buff.append(" WRONG tracer\n") if _cmp == 0: # ensure second field is in reverse order _cmp = cmp(_v2, lastSub) if _cmp > 0: fail = True print "rev field fail:", _v2, ">", lastSub buff.append(" WRONG tracer2\n") elif _cmp == 0: # ensure docid is in order if scoreDoc.doc < lastDocId: fail = True print "doc fail:", scoreDoc.doc, ">", lastDocId buff.append(" WRONG docID\n") last = _v lastSub = _v2 lastDocId = scoreDoc.doc if fail: print "topn field1(field2)(docID):", ''.join(buff) self.assert_(not fail, "Found sort results out of order") searcher.getIndexReader().close() def testStringSort(self): """ Test String sorting: small queue to many matches, multi field sort, reverse sort """ sort = self.sort # Normal string field, var length sort.setSort([SortField("string", SortField.Type.STRING), SortField("string2", SortField.Type.STRING, True), SortField.FIELD_DOC]) self._verifyStringSort(sort) # Normal string field, fixed length sort.setSort([SortField("string_fixed", SortField.Type.STRING), SortField("string2_fixed", SortField.Type.STRING, True), SortField.FIELD_DOC]) self._verifyStringSort(sort) # Doc values field, var length self.assertTrue(self.supportsDocValues, "cannot work with preflex codec") sort.setSort([SortField("string_dv", self._getDVStringSortType()), SortField("string2_dv", self._getDVStringSortType(), True), SortField.FIELD_DOC]) self._verifyStringSort(sort) # Doc values field, fixed length sort.setSort([SortField("string_fixed_dv", self._getDVStringSortType()), SortField("string2_fixed_dv", self._getDVStringSortType(), True), SortField.FIELD_DOC]) self._verifyStringSort(sort) def testCustomFieldParserSort(self): """ test sorts where the type of field is specified and a custom field parser is used, that uses a simple char encoding. The sorted string contains a character beginning from 'A' that is mapped to a numeric value using some "funny" algorithm to be different for each data type. """ # since tests explicitly use different parsers on the same field name # we explicitly check/purge the FieldCache between each assertMatch fc = FieldCache.DEFAULT class intParser(PythonIntParser): def parseInt(_self, val): return (val.bytes[val.offset] - ord('A')) * 123456 def termsEnum(_self, terms): return terms.iterator(None) class floatParser(PythonFloatParser): def parseFloat(_self, val): return math.sqrt(val.bytes[val.offset]) def termsEnum(_self, terms): return terms.iterator(None) class longParser(PythonLongParser): def parseLong(_self, val): return (val.bytes[val.offset] - ord('A')) * 1234567890L def termsEnum(_self, terms): return terms.iterator(None) class doubleParser(PythonDoubleParser): def parseDouble(_self, val): return math.pow(val.bytes[val.offset], val.bytes[val.offset] - ord('A')) def termsEnum(_self, terms): return terms.iterator(None) class byteParser(PythonByteParser): def parseByte(_self, val): return chr(val.bytes[val.offset] - ord('A')) def termsEnum(_self, terms): return terms.iterator(None) class shortParser(PythonShortParser): def parseShort(_self, val): return val.bytes[val.offset] - ord('A') def termsEnum(_self, terms): return terms.iterator(None) sort = self.sort sort.setSort([SortField("parser", intParser()), SortField.FIELD_DOC]) self._assertMatches(self.full, self.queryA, sort, "JIHGFEDCBA") self._assertSaneFieldCaches(self.getName() + " IntParser") fc.purgeAllCaches() sort.setSort([SortField("parser", floatParser()), SortField.FIELD_DOC]) self._assertMatches(self.full, self.queryA, sort, "JIHGFEDCBA") self._assertSaneFieldCaches(self.getName() + " FloatParser") fc.purgeAllCaches() sort.setSort([SortField("parser", longParser()), SortField.FIELD_DOC]) self._assertMatches(self.full, self.queryA, sort, "JIHGFEDCBA") self._assertSaneFieldCaches(self.getName() + " LongParser") fc.purgeAllCaches() sort.setSort([SortField("parser", doubleParser()), SortField.FIELD_DOC]) self._assertMatches(self.full, self.queryA, sort, "JIHGFEDCBA") self._assertSaneFieldCaches(self.getName() + " DoubleParser") fc.purgeAllCaches() sort.setSort([SortField("parser", byteParser()), SortField.FIELD_DOC]) self._assertMatches(self.full, self.queryA, sort, "JIHGFEDCBA") self._assertSaneFieldCaches(self.getName() + " ByteParser") fc.purgeAllCaches() sort.setSort([SortField("parser", shortParser()), SortField.FIELD_DOC]) self._assertMatches(self.full, self.queryA, sort, "JIHGFEDCBA") self._assertSaneFieldCaches(self.getName() + " ShortParser") fc.purgeAllCaches() def testEmptyIndex(self): """ test sorts when there's nothing in the index """ sort = self.sort empty = self._getEmptyIndex() self._assertMatches(empty, self.queryX, sort, "") sort.setSort(SortField.FIELD_DOC) self._assertMatches(empty, self.queryX, sort, "") sort.setSort([SortField("int", SortField.Type.INT), SortField.FIELD_DOC]) self._assertMatches(empty, self.queryX, sort, "") sort.setSort([SortField("int_dv", SortField.Type.INT), SortField.FIELD_DOC]) self._assertMatches(empty, self.queryX, sort, "") sort.setSort([SortField("string", SortField.Type.STRING, True), SortField.FIELD_DOC]) self._assertMatches(empty, self.queryX, sort, "") sort.setSort([SortField("float", SortField.Type.FLOAT), SortField("string", SortField.Type.STRING)]) self._assertMatches(empty, self.queryX, sort, "") sort.setSort([SortField("float_dv", SortField.Type.FLOAT), SortField("string", SortField.Type.STRING)]) self._assertMatches(empty, self.queryX, sort, "") sort.setSort([SortField("string_dv", self._getDVStringSortType(False), True), SortField.FIELD_DOC]) self._assertMatches(empty, self.queryX, sort, "") sort.setSort([SortField("float_dv", SortField.Type.FLOAT), SortField("string_dv", self._getDVStringSortType(False))]) self._assertMatches(empty, self.queryX, sort, "") sort.setSort([SortField("float_dv", SortField.Type.FLOAT), SortField("string_dv", self._getDVStringSortType(False))]) self._assertMatches(empty, self.queryX, sort, "") def testNewCustomFieldParserSort(self): """ Test sorting w/ custom FieldComparator """ sort = self.sort sort.setSort([SortField("parser", MyFieldComparatorSource())]) self._assertMatches(self.full, self.queryA, sort, "JIHGFEDCBA") def testReverseSort(self): """ test sorts in reverse """ sort = self.sort sort.setSort([SortField(None, SortField.Type.SCORE, True), SortField.FIELD_DOC]) self._assertMatches(self.full, self.queryX, sort, "IEGCA") self._assertMatches(self.full, self.queryY, sort, "JFHDB") sort.setSort(SortField(None, SortField.Type.DOC, True)) self._assertMatches(self.full, self.queryX, sort, "IGECA") self._assertMatches(self.full, self.queryY, sort, "JHFDB") sort.setSort(SortField("int", SortField.Type.INT, True)) self._assertMatches(self.full, self.queryX, sort, "CAEGI") self._assertMatches(self.full, self.queryY, sort, "BJFHD") sort.setSort(SortField("float", SortField.Type.FLOAT, True)) self._assertMatches(self.full, self.queryX, sort, "AECIG") self._assertMatches(self.full, self.queryY, sort, "BFJHD") sort.setSort(SortField("string", SortField.Type.STRING, True)) self._assertMatches(self.full, self.queryX, sort, "CEGIA") self._assertMatches(self.full, self.queryY, sort, "BFHJD") if self.supportsDocValues: sort.setSort(SortField("int_dv", SortField.Type.INT, True)) self._assertMatches(self.full, self.queryX, sort, "CAEGI") self._assertMatches(self.full, self.queryY, sort, "BJFHD") sort.setSort(SortField("float_dv", SortField.Type.FLOAT, True)) self._assertMatches(self.full, self.queryX, sort, "AECIG") self._assertMatches(self.full, self.queryY, sort, "BFJHD") sort.setSort(SortField("string_dv", self._getDVStringSortType(), True)) self._assertMatches(self.full, self.queryX, sort, "CEGIA") self._assertMatches(self.full, self.queryY, sort, "BFHJD") def testEmptyFieldSort(self): """ test sorting when the sort field is empty(undefined) for some of the documents """ sort = self.sort sort.setSort(SortField("string", SortField.Type.STRING)) self._assertMatches(self.full, self.queryF, sort, "ZJI") sort.setSort(SortField("string", SortField.Type.STRING, True)) self._assertMatches(self.full, self.queryF, sort, "IJZ") sort.setSort(SortField("int", SortField.Type.INT)) self._assertMatches(self.full, self.queryF, sort, "IZJ") sort.setSort(SortField("int", SortField.Type.INT, True)) self._assertMatches(self.full, self.queryF, sort, "JZI") sort.setSort(SortField("float", SortField.Type.FLOAT)) self._assertMatches(self.full, self.queryF, sort, "ZJI") # using a nonexisting field as first sort key shouldn't make a # difference: sort.setSort([SortField("nosuchfield", SortField.Type.STRING), SortField("float", SortField.Type.FLOAT)]) self._assertMatches(self.full, self.queryF, sort, "ZJI") sort.setSort(SortField("float", SortField.Type.FLOAT, True)) self._assertMatches(self.full, self.queryF, sort, "IJZ") # When a field is None for both documents, the next SortField should # be used. # Works for sort.setSort([SortField("int", SortField.Type.INT), SortField("string", SortField.Type.STRING), SortField("float", SortField.Type.FLOAT)]) self._assertMatches(self.full, self.queryG, sort, "ZWXY") # Reverse the last criterium to make sure the test didn't pass by # chance sort.setSort([SortField("int", SortField.Type.INT), SortField("string", SortField.Type.STRING), SortField("float", SortField.Type.FLOAT, True)]) self._assertMatches(self.full, self.queryG, sort, "ZYXW") # Do the same for a ParallelMultiSearcher threadPool = Executors.newFixedThreadPool(self.getRandomNumber(2, 8), NamedThreadFactory("testEmptyFieldSort")) parallelSearcher=IndexSearcher(self.full.getIndexReader(), threadPool) sort.setSort([SortField("int", SortField.Type.INT), SortField("string", SortField.Type.STRING), SortField("float", SortField.Type.FLOAT)]) self._assertMatches(parallelSearcher, self.queryG, sort, "ZWXY") sort.setSort([SortField("int", SortField.Type.INT), SortField("string", SortField.Type.STRING), SortField("float", SortField.Type.FLOAT, True)]) self._assertMatches(parallelSearcher, self.queryG, sort, "ZYXW") threadPool.shutdown() threadPool.awaitTermination(1000L, TimeUnit.MILLISECONDS) def testSortCombos(self): """ test sorts using a series of fields """ sort = self.sort sort.setSort([SortField("int", SortField.Type.INT), SortField("float", SortField.Type.FLOAT)]) self._assertMatches(self.full, self.queryX, sort, "IGEAC") sort.setSort([SortField("int", SortField.Type.INT, True), SortField(None, SortField.Type.DOC, True)]) self._assertMatches(self.full, self.queryX, sort, "CEAGI") sort.setSort([SortField("float", SortField.Type.FLOAT), SortField("string", SortField.Type.STRING)]) self._assertMatches(self.full, self.queryX, sort, "GICEA") if self.supportsDocValues: sort.setSort([SortField("int_dv", SortField.Type.INT), SortField("float_dv", SortField.Type.FLOAT)]) self._assertMatches(self.full, self.queryX, sort, "IGEAC") sort.setSort([SortField("int_dv", SortField.Type.INT, True), SortField(None, SortField.Type.DOC, True)]) self._assertMatches(self.full, self.queryX, sort, "CEAGI") sort.setSort([SortField("float_dv", SortField.Type.FLOAT), SortField("string_dv", self._getDVStringSortType())]) self._assertMatches(self.full, self.queryX, sort, "GICEA") def testParallelMultiSort(self): """ test a variety of sorts using a parallel multisearcher """ threadPool = Executors.newFixedThreadPool(self.getRandomNumber(2, 8), NamedThreadFactory("testParallelMultiSort")) searcher = IndexSearcher(MultiReader([self.searchX.getIndexReader(), self.searchY.getIndexReader()]), threadPool) self._runMultiSorts(searcher, False) threadPool.shutdown(); threadPool.awaitTermination(1000L, TimeUnit.MILLISECONDS); def testTopDocsScores(self): """ There was previously a bug in FieldSortedHitQueue.maxscore when only a single doc was added. That is what the following tests for. """ sort = Sort() nDocs = 10 # try to pick a query that will result in an unnormalized # score greater than 1 to test for correct normalization docs1 = self.full.search(self.queryE, None, nDocs, sort, True, True) # a filter that only allows through the first hit class filter(PythonFilter): def getDocIdSet(_self, context, acceptDocs): reader = context.reader() bs = BitSet(reader.maxDoc()) bs.set(0, reader.maxDoc()) bs.set(docs1.scoreDocs[0].doc) return DocIdBitSet(bs) docs2 = self.full.search(self.queryE, filter(), nDocs, sort, True, True) self.assertEqual(docs1.scoreDocs[0].score, docs2.scoreDocs[0].score, 1e-6) def testSortWithoutFillFields(self): """ There was previously a bug in TopFieldCollector when fillFields was set to False - the same doc and score was set in ScoreDoc[] array. This test asserts that if fillFields is False, the documents are set properly. It does not use Searcher's default search methods(with Sort) since all set fillFields to True. """ sorts = [Sort(SortField.FIELD_DOC), Sort()] for sort in sorts: q = MatchAllDocsQuery() tdc = TopFieldCollector.create(sort, 10, False, False, False, True) self.full.search(q, tdc) sds = tdc.topDocs().scoreDocs for i in xrange(1, len(sds)): self.assert_(sds[i].doc != sds[i - 1].doc) def testSortWithoutScoreTracking(self): """ Two Sort criteria to instantiate the multi/single comparators. """ sorts = [Sort(SortField.FIELD_DOC), Sort()] for sort in sorts: q = MatchAllDocsQuery() tdc = TopFieldCollector.create(sort, 10, True, False, False, True) self.full.search(q, tdc) tds = tdc.topDocs() sds = tds.scoreDocs for sd in sds: self.assert_(Float.isNaN_(sd.score)) self.assert_(Float.isNaN_(tds.getMaxScore())) def testSortWithScoreNoMaxScoreTracking(self): """ Two Sort criteria to instantiate the multi/single comparators. """ sorts = [Sort(SortField.FIELD_DOC), Sort()] for sort in sorts: q = MatchAllDocsQuery() tdc = TopFieldCollector.create(sort, 10, True, True, False, True) self.full.search(q, tdc) tds = tdc.topDocs() sds = tds.scoreDocs for sd in sds: self.assert_(not Float.isNaN_(sd.score)) self.assert_(Float.isNaN_(tds.getMaxScore())) def testSortWithScoreAndMaxScoreTracking(self): """ Two Sort criteria to instantiate the multi/single comparators. """ sorts = [Sort(SortField.FIELD_DOC), Sort()] for sort in sorts: q = MatchAllDocsQuery() tdc = TopFieldCollector.create(sort, 10, True, True, True, True) self.full.search(q, tdc) tds = tdc.topDocs() sds = tds.scoreDocs for sd in sds: self.assert_(not Float.isNaN_(sd.score)) self.assert_(not Float.isNaN_(tds.getMaxScore())) def testOutOfOrderDocsScoringSort(self): """ Two Sort criteria to instantiate the multi/single comparators. """ sorts = [Sort(SortField.FIELD_DOC), Sort()] tfcOptions = [[False, False, False], [False, False, True], [False, True, False], [False, True, True], [True, False, False], [True, False, True], [True, True, False], [True, True, True]] actualTFCClasses = [ "OutOfOrderOneComparatorNonScoringCollector", "OutOfOrderOneComparatorScoringMaxScoreCollector", "OutOfOrderOneComparatorScoringNoMaxScoreCollector", "OutOfOrderOneComparatorScoringMaxScoreCollector", "OutOfOrderOneComparatorNonScoringCollector", "OutOfOrderOneComparatorScoringMaxScoreCollector", "OutOfOrderOneComparatorScoringNoMaxScoreCollector", "OutOfOrderOneComparatorScoringMaxScoreCollector" ] bq = BooleanQuery() # Add a Query with SHOULD, since bw.scorer() returns BooleanScorer2 # which delegates to BS if there are no mandatory clauses. bq.add(MatchAllDocsQuery(), BooleanClause.Occur.SHOULD) # Set minNrShouldMatch to 1 so that BQ will not optimize rewrite to # return the clause instead of BQ. bq.setMinimumNumberShouldMatch(1) for sort in sorts: for tfcOption, actualTFCClass in izip(tfcOptions, actualTFCClasses): tdc = TopFieldCollector.create(sort, 10, tfcOption[0], tfcOption[1], tfcOption[2], False) self.assert_(tdc.getClass().getName().endswith("$" + actualTFCClass)) self.full.search(bq, tdc) tds = tdc.topDocs() sds = tds.scoreDocs self.assertEqual(10, len(sds)) def testSortWithScoreAndMaxScoreTrackingNoResults(self): """ Two Sort criteria to instantiate the multi/single comparators. """ sorts = [Sort(SortField.FIELD_DOC), Sort()] for sort in sorts: tdc = TopFieldCollector.create(sort, 10, True, True, True, True) tds = tdc.topDocs() self.assertEqual(0, tds.totalHits) self.assert_(Float.isNaN_(tds.getMaxScore())) def _runMultiSorts(self, multi, isFull): """ runs a variety of sorts useful for multisearchers """ sort = self.sort sort.setSort(SortField.FIELD_DOC) expected = isFull and "ABCDEFGHIJ" or "ACEGIBDFHJ" self._assertMatches(multi, self.queryA, sort, expected) sort.setSort(SortField("int", SortField.Type.INT)) expected = isFull and "IDHFGJABEC" or "IDHFGJAEBC" self._assertMatches(multi, self.queryA, sort, expected) sort.setSort([SortField("int", SortField.Type.INT), SortField.FIELD_DOC]) expected = isFull and "IDHFGJABEC" or "IDHFGJAEBC" self._assertMatches(multi, self.queryA, sort, expected) sort.setSort(SortField("int", SortField.Type.INT)) expected = isFull and "IDHFGJABEC" or "IDHFGJAEBC" self._assertMatches(multi, self.queryA, sort, expected) sort.setSort([SortField("float", SortField.Type.FLOAT), SortField.FIELD_DOC]) self._assertMatches(multi, self.queryA, sort, "GDHJCIEFAB") sort.setSort(SortField("float", SortField.Type.FLOAT)) self._assertMatches(multi, self.queryA, sort, "GDHJCIEFAB") sort.setSort(SortField("string", SortField.Type.STRING)) self._assertMatches(multi, self.queryA, sort, "DJAIHGFEBC") sort.setSort(SortField("int", SortField.Type.INT, True)) expected = isFull and "CABEJGFHDI" or "CAEBJGFHDI" self._assertMatches(multi, self.queryA, sort, expected) sort.setSort(SortField("float", SortField.Type.FLOAT, True)) self._assertMatches(multi, self.queryA, sort, "BAFECIJHDG") sort.setSort(SortField("string", SortField.Type.STRING, True)) self._assertMatches(multi, self.queryA, sort, "CBEFGHIAJD") sort.setSort([SortField("int", SortField.Type.INT), SortField("float", SortField.Type.FLOAT)]) self._assertMatches(multi, self.queryA, sort, "IDHFGJEABC") sort.setSort([SortField("float", SortField.Type.FLOAT), SortField("string", SortField.Type.STRING)]) self._assertMatches(multi, self.queryA, sort, "GDHJICEFAB") sort.setSort(SortField("int", SortField.Type.INT)) self._assertMatches(multi, self.queryF, sort, "IZJ") sort.setSort(SortField("int", SortField.Type.INT, True)) self._assertMatches(multi, self.queryF, sort, "JZI") sort.setSort(SortField("float", SortField.Type.FLOAT)) self._assertMatches(multi, self.queryF, sort, "ZJI") sort.setSort(SortField("string", SortField.Type.STRING)) self._assertMatches(multi, self.queryF, sort, "ZJI") sort.setSort(SortField("string", SortField.Type.STRING, True)) self._assertMatches(multi, self.queryF, sort, "IJZ") if self.supportsDocValues: sort.setSort(SortField("int_dv", SortField.Type.INT)) expected = isFull and "IDHFGJABEC" or "IDHFGJAEBC" self._assertMatches(multi, self.queryA, sort, expected) sort.setSort([SortField("int_dv", SortField.Type.INT), SortField.FIELD_DOC]) expected = isFull and "IDHFGJABEC" or "IDHFGJAEBC" self._assertMatches(multi, self.queryA, sort, expected) sort.setSort(SortField("int_dv", SortField.Type.INT)) expected = isFull and "IDHFGJABEC" or "IDHFGJAEBC" self._assertMatches(multi, self.queryA, sort, expected) sort.setSort([SortField("float_dv", SortField.Type.FLOAT), SortField.FIELD_DOC]) self._assertMatches(multi, self.queryA, sort, "GDHJCIEFAB") sort.setSort(SortField("float_dv", SortField.Type.FLOAT)) self._assertMatches(multi, self.queryA, sort, "GDHJCIEFAB") sort.setSort(SortField("int_dv", SortField.Type.INT, True)) expected = isFull and "CABEJGFHDI" or "CAEBJGFHDI" self._assertMatches(multi, self.queryA, sort, expected) sort.setSort([SortField("int_dv", SortField.Type.INT), SortField("float_dv", SortField.Type.FLOAT)]) self._assertMatches(multi, self.queryA, sort, "IDHFGJEABC") sort.setSort(SortField("int_dv", SortField.Type.INT)) self._assertMatches(multi, self.queryF, sort, "IZJ") sort.setSort(SortField("int_dv", SortField.Type.INT, True)) self._assertMatches(multi, self.queryF, sort, "JZI") sort.setSort(SortField("string_dv", self._getDVStringSortType())) self._assertMatches(multi, self.queryA, sort, "DJAIHGFEBC") sort.setSort(SortField("string_dv", self._getDVStringSortType(), True)) self._assertMatches(multi, self.queryA, sort, "CBEFGHIAJD") sort.setSort([SortField("float_dv", SortField.Type.FLOAT), SortField("string_dv", self._getDVStringSortType())]) self._assertMatches(multi, self.queryA, sort, "GDHJICEFAB") sort.setSort(SortField("string_dv", self._getDVStringSortType())) self._assertMatches(multi, self.queryF, sort, "ZJI") sort.setSort(SortField("string_dv", self._getDVStringSortType(), True)) self._assertMatches(multi, self.queryF, sort, "IJZ") # up to this point, all of the searches should have "sane" # FieldCache behavior, and should have reused hte cache in several # cases self._assertSaneFieldCaches(self.getName() + " various") FieldCache.DEFAULT.purgeAllCaches() def _assertMatches(self, searcher, query, sort, expectedResult): """ make sure the documents returned by the search match the expected list """ # ScoreDoc[] result = searcher.search(query, None, 1000, sort).scoreDocs hits = searcher.search(query, None, len(expectedResult) or 1, sort) sds = hits.scoreDocs self.assertEqual(hits.totalHits, len(expectedResult)) buff = [] for sd in sds: doc = searcher.doc(sd.doc) v = doc.getValues("tracer") for _v in v: buff.append(_v) self.assertEqual(expectedResult, ''.join(buff)) def getScores(self, hits, searcher): scoreMap = {} for hit in hits: doc = searcher.doc(hit.doc) v = doc.getValues("tracer") self.assertEqual(len(v), 1) scoreMap[v[0]] = hit.score return scoreMap def _assertSameValues(self, m1, m2): """ make sure all the values in the maps match """ self.assertEquals(len(m1), len(m2)) for key in m1.iterkeys(): self.assertEquals(m1[key], m2[key], 1e-6) def getName(self): return type(self).__name__ def _assertSaneFieldCaches(self, msg): entries = FieldCache.DEFAULT.getCacheEntries() insanity = FieldCacheSanityChecker.checkSanity(entries) if insanity: print [x for x in insanity] self.assertEqual(0, len(insanity), msg + ": Insane FieldCache usage(s) found") class MyFieldComparator(PythonFieldComparator): def __init__(self, numHits): super(MyFieldComparator, self).__init__() self.slotValues = [0] * numHits def copy(self, slot, doc): self.slotValues[slot] = self.docValues.get(doc) def compare(self, slot1, slot2): return self.slotValues[slot1] - self.slotValues[slot2] def compareBottom(self, doc): return self.bottomValue - self.docValues.get(doc) def setBottom(self, bottom): self.bottomValue = self.slotValues[bottom] def setNextReader(self, context): class intParser(PythonIntParser): def parseInt(_self, val): return (val.bytes[val.offset] - ord('A')) * 123456 def termsEnum(_self, terms): return terms.iterator(None) self.docValues = FieldCache.DEFAULT.getInts(context.reader(), "parser", intParser(), False) return self def value(self, slot): return Integer(self.slotValues[slot]) def compareDocToValue(self, doc, valueObj): value = valueObj.intValue() docValue = self.docValues.get(doc) # values are small enough that overflow won't happen return docValue - value class MyFieldComparatorSource(PythonFieldComparatorSource): def newComparator(self, fieldname, numHits, sortPos, reversed): # keep an extra ref since this object seems to be passed around # back and forth without a reference being kept on the java side self.saved = MyFieldComparator(numHits) return self.saved if __name__ == "__main__": env = lucene.initVM(vmargs=['-Djava.awt.headless=true']) if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: unittest.main() except: pass # refs = sorted(env._dumpRefs(classes=True).items(), # key=lambda x: x[1], reverse=True) # print refs[0:4] else: unittest.main() pylucene-4.10.1-1/test/test_StopAnalyzer.py000644 000765 000000 00000007076 12162654000 021033 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import sys, lucene, unittest from java.io import StringReader from org.apache.lucene.analysis.core import StopAnalyzer, StopFilter from org.apache.lucene.analysis.tokenattributes import \ CharTermAttribute, PositionIncrementAttribute from org.apache.lucene.util import Version class StopAnalyzerTestCase(unittest.TestCase): """ Unit tests ported from Java Lucene """ def setUp(self): self.stop = StopAnalyzer(Version.LUCENE_CURRENT) self.invalidTokens = StopAnalyzer.ENGLISH_STOP_WORDS_SET def testDefaults(self): self.assert_(self.stop is not None) reader = StringReader("This is a test of the english stop analyzer") stream = self.stop.tokenStream("test", reader) self.assert_(stream is not None) stream.reset() termAtt = stream.getAttribute(CharTermAttribute.class_) while stream.incrementToken(): self.assert_(termAtt.toString() not in self.invalidTokens) def testStopList(self): stopWords = ["good", "test", "analyzer"] stopWordsSet = StopFilter.makeStopSet(Version.LUCENE_CURRENT, stopWords) newStop = StopAnalyzer(Version.LUCENE_40, stopWordsSet) reader = StringReader("This is a good test of the english stop analyzer") stream = newStop.tokenStream("test", reader) self.assert_(stream is not None) stream.reset() termAtt = stream.getAttribute(CharTermAttribute.class_) while stream.incrementToken(): text = termAtt.toString() self.assert_(text not in stopWordsSet) def testStopListPositions(self): stopWords = ["good", "test", "analyzer"] stopWordsSet = StopFilter.makeStopSet(Version.LUCENE_CURRENT, stopWords) newStop = StopAnalyzer(Version.LUCENE_CURRENT, stopWordsSet) reader = StringReader("This is a good test of the english stop analyzer with positions") expectedIncr = [ 1, 1, 1, 3, 1, 1, 1, 2, 1] stream = newStop.tokenStream("test", reader) self.assert_(stream is not None) stream.reset() i = 0 termAtt = stream.getAttribute(CharTermAttribute.class_) posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class_) while stream.incrementToken(): text = termAtt.toString() self.assert_(text not in stopWordsSet) self.assertEqual(expectedIncr[i], posIncrAtt.getPositionIncrement()) i += 1 if __name__ == "__main__": lucene.initVM(vmargs=['-Djava.awt.headless=true']) if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: unittest.main() except: pass else: unittest.main() pylucene-4.10.1-1/test/test_StopWords.py000644 000765 000000 00000003461 12162654000 020336 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import sys, lucene, unittest from java.io import StringReader from org.apache.lucene.analysis.core import StopFilter from org.apache.lucene.analysis.standard import StandardTokenizer from org.apache.lucene.util import Version # run with -loop to test fix for string local ref leak reported # by Aaron Lav. class StopWordsTestCase(unittest.TestCase): def setUp(self): stopWords = ['the', 'and', 's'] self.stop_set = StopFilter.makeStopSet(Version.LUCENE_CURRENT, stopWords) self.reader = StringReader('foo') def testStopWords(self): try: result = StandardTokenizer(Version.LUCENE_CURRENT, self.reader) result = StopFilter(Version.LUCENE_CURRENT, result, self.stop_set) except Exception, e: self.fail(str(e)) if __name__ == "__main__": lucene.initVM(vmargs=['-Djava.awt.headless=true']) if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: unittest.main() except: pass else: unittest.main() pylucene-4.10.1-1/test/test_TermRangeFilter.py000644 000765 000000 00000020112 12162654000 021414 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import sys, lucene, unittest from BaseTestRangeFilter import BaseTestRangeFilter from org.apache.lucene.analysis.core import SimpleAnalyzer from org.apache.lucene.document import Document, Field, StringField from org.apache.lucene.index import Term from org.apache.lucene.store import RAMDirectory from org.apache.lucene.search import TermQuery, TermRangeFilter # # A basic 'positive' Unit test class for the TermRangeFilter class. # # NOTE: at the moment, this class only tests for 'positive' results, # it does not verify the results to ensure there are no 'false positives', # nor does it adequately test 'negative' results. It also does not test # that garbage in results in an Exception. # def _trf(*args): return TermRangeFilter.newStringRange(*args) class TestTermRangeFilter(BaseTestRangeFilter): def testRangeFilterId(self): index = self.signedIndex reader = self.getReader(directory=index.index); search = self.getSearcher(reader=reader) medId = ((self.maxId - self.minId) / 2) minIP = self.pad(self.minId) maxIP = self.pad(self.maxId) medIP = self.pad(medId) numDocs = reader.numDocs() self.assertEqual(numDocs, 1 + self.maxId - self.minId, "num of docs") q = TermQuery(Term("body","body")) # test id, bounded on both ends result = search.search(q, _trf("id", minIP, maxIP, True, True), 50) self.assertEqual(numDocs, result.totalHits, "find all") result = search.search(q, _trf("id", minIP, maxIP, True, False), 50) self.assertEqual(numDocs - 1, result.totalHits, "all but last") result = search.search(q, _trf("id", minIP, maxIP, False, True), 50) self.assertEqual(numDocs - 1, result.totalHits, "all but first") result = search.search(q, _trf("id", minIP, maxIP, False, False), 50) self.assertEqual(numDocs - 2, result.totalHits, "all but ends") result = search.search(q, _trf("id", medIP, maxIP, True, True), 50) self.assertEqual(1 + self.maxId - medId, result.totalHits, "med and up") result = search.search(q, _trf("id", minIP, medIP, True, True), 50) self.assertEqual(1 + medId - self.minId, result.totalHits, "up to med") # unbounded id result = search.search(q, _trf("id", minIP, None, True, False), 50) self.assertEqual(numDocs, result.totalHits, "min and up") result = search.search(q, _trf("id", None, maxIP, False, True), 50) self.assertEqual(numDocs, result.totalHits, "max and down") result = search.search(q, _trf("id", minIP, None, False, False), 50) self.assertEqual(numDocs - 1, result.totalHits, "not min, but up") result = search.search(q, _trf("id", None, maxIP, False, False), 50) self.assertEqual(numDocs - 1, result.totalHits, "not max, but down") result = search.search(q, _trf("id",medIP, maxIP, True, False), 50) self.assertEqual(self.maxId - medId, result.totalHits, "med and up, not max") result = search.search(q, _trf("id", minIP, medIP, False, True), 50) self.assertEqual(medId - self.minId, result.totalHits, "not min, up to med") # very small sets result = search.search(q, _trf("id", minIP, minIP, False, False), 50) self.assertEqual(0, result.totalHits, "min, min, False, False") result = search.search(q, _trf("id", medIP, medIP, False, False), 50) self.assertEqual(0, result.totalHits, "med, med, False, False") result = search.search(q, _trf("id", maxIP, maxIP, False, False), 50) self.assertEqual(0, result.totalHits, "max, max, False, False") result = search.search(q, _trf("id", minIP, minIP, True, True), 50) self.assertEqual(1, result.totalHits, "min, min, True, True") result = search.search(q, _trf("id", None, minIP, False, True), 50) self.assertEqual(1, result.totalHits, "nul, min, False, True") result = search.search(q, _trf("id", maxIP, maxIP, True, True), 50) self.assertEqual(1, result.totalHits, "max, max, True, True") result = search.search(q, _trf("id", maxIP, None, True, False), 50) self.assertEqual(1, result.totalHits, "max, nul, True, True") result = search.search(q, _trf("id", medIP, medIP, True, True), 50) self.assertEqual(1, result.totalHits, "med, med, True, True") def testRangeFilterRand(self): index = self.signedIndex reader = self.getReader(directory=index.index) search = self.getSearcher(reader=reader) minRP = self.pad(index.minR) maxRP = self.pad(index.maxR) numDocs = reader.numDocs() self.assertEqual(numDocs, 1 + self.maxId - self.minId, "num of docs") q = TermQuery(Term("body", "body")) # test extremes, bounded on both ends result = search.search(q, _trf("rand", minRP, maxRP, True, True), 50) self.assertEqual(numDocs, result.totalHits, "find all") result = search.search(q, _trf("rand", minRP, maxRP, True, False), 50) self.assertEqual(numDocs - 1, result.totalHits, "all but biggest") result = search.search(q, _trf("rand", minRP, maxRP, False, True), 50) self.assertEqual(numDocs - 1, result.totalHits, "all but smallest") result = search.search(q, _trf("rand", minRP, maxRP, False, False), 50) self.assertEqual(numDocs - 2, result.totalHits, "all but extremes") # unbounded result = search.search(q, _trf("rand", minRP, None, True, False), 50) self.assertEqual(numDocs, result.totalHits, "smallest and up") result = search.search(q, _trf("rand", None, maxRP, False, True), 50) self.assertEqual(numDocs, result.totalHits, "biggest and down") result = search.search(q, _trf("rand", minRP, None, False, False), 50) self.assertEqual(numDocs - 1, result.totalHits, "not smallest, but up") result = search.search(q, _trf("rand", None, maxRP, False, False), 50) self.assertEqual(numDocs - 1, result.totalHits, "not biggest, but down") # very small sets result = search.search(q, _trf("rand", minRP, minRP, False, False), 50) self.assertEqual(0, result.totalHits, "min, min, False, False") result = search.search(q, _trf("rand", maxRP, maxRP, False, False), 50) self.assertEqual(0, result.totalHits, "max, max, False, False") result = search.search(q, _trf("rand", minRP, minRP, True, True), 50) self.assertEqual(1, result.totalHits, "min, min, True, True") result = search.search(q, _trf("rand", None, minRP, False, True), 50) self.assertEqual(1, result.totalHits, "nul, min, False, True") result = search.search(q, _trf("rand", maxRP, maxRP, True, True), 50) self.assertEqual(1, result.totalHits, "max, max, True, True") result = search.search(q, _trf("rand", maxRP, None, True, False), 50) self.assertEqual(1, result.totalHits, "max, nul, True, True") if __name__ == "__main__": lucene.initVM(vmargs=['-Djava.awt.headless=true']) if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: unittest.main() except: pass else: unittest.main() pylucene-4.10.1-1/test/test_TermRangeQuery.py000644 000765 000000 00000007402 12162654000 021303 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import sys, lucene, unittest from PyLuceneTestCase import PyLuceneTestCase from org.apache.lucene.document import Document, Field, StringField, TextField from org.apache.lucene.index import IndexWriterConfig from org.apache.lucene.search import TermRangeQuery class TermRangeQueryTestCase(PyLuceneTestCase): """ Unit tests ported from Java Lucene """ def setUp(self): super(TermRangeQueryTestCase, self).setUp() self.docCount = 0 def _initializeIndex(self, values): writer = self.getWriter() for value in values: self._insertDoc(writer, value) writer.close() def _insertDoc(self, writer, content): doc = Document() doc.add(Field("id", "id" + str(self.docCount), StringField.TYPE_STORED)) doc.add(Field("content", content, TextField.TYPE_NOT_STORED)) writer.addDocument(doc) self.docCount += 1 def _addDoc(self, content): writer = self.getWriter(open_mode=IndexWriterConfig.OpenMode.APPEND) self._insertDoc(writer, content) writer.close() def testExclusive(self): query = TermRangeQuery.newStringRange("content", "A", "C", False, False) self._initializeIndex(["A", "B", "C", "D"]) searcher = self.getSearcher() topDocs = searcher.search(query, 50) self.assertEqual(1, topDocs.totalHits, "A,B,C,D, only B in range") del searcher self._initializeIndex(["A", "B", "D"]) searcher = self.getSearcher() topDocs = searcher.search(query, 50) self.assertEqual(1, topDocs.totalHits, "A,B,D, only B in range") del searcher self._addDoc("C") searcher = self.getSearcher() topDocs = searcher.search(query, 50) self.assertEqual(1, topDocs.totalHits, "C added, still only B in range") del searcher def testInclusive(self): query = TermRangeQuery.newStringRange("content", "A", "C", True, True) self._initializeIndex(["A", "B", "C", "D"]) searcher = self.getSearcher() topDocs = searcher.search(query, 50) self.assertEqual(3, topDocs.totalHits, "A,B,C,D - A,B,C in range") del searcher self._initializeIndex(["A", "B", "D"]) searcher = self.getSearcher() topDocs = searcher.search(query, 50) self.assertEqual(2, topDocs.totalHits, "A,B,D - A and B in range") del searcher self._addDoc("C") searcher = self.getSearcher() topDocs = searcher.search(query, 50) self.assertEqual(3, topDocs.totalHits, "C added - A, B, C in range") del searcher if __name__ == "__main__": lucene.initVM(vmargs=['-Djava.awt.headless=true']) if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: unittest.main() except: pass else: unittest.main() pylucene-4.10.1-1/test/test_ThaiAnalyzer.py000644 000765 000000 00000015265 12162654000 020772 0ustar00vajdawheel000000 000000 # -*- coding: utf-8 -*- # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== import sys, lucene, unittest from BaseTokenStreamTestCase import BaseTokenStreamTestCase from java.io import StringReader from org.apache.lucene.analysis.th import ThaiAnalyzer, ThaiWordFilter from org.apache.lucene.analysis.util import CharArraySet from org.apache.lucene.util import Version class ThaiAnalyzerTestCase(BaseTokenStreamTestCase): def testOffsets(self): self.assert_(ThaiWordFilter.DBBI_AVAILABLE, "JRE does not support Thai dictionary-based BreakIterator") self._assertAnalyzesTo(ThaiAnalyzer(Version.LUCENE_CURRENT, CharArraySet.EMPTY_SET), u"การที่ได้ต้องแสดงว่างานดี", [ u"การ", u"ที่", u"ได้", u"ต้อง", u"แสดง", u"ว่า", u"งาน", u"ดี" ], [ 0, 3, 6, 9, 13, 17, 20, 23 ], [ 3, 6, 9, 13, 17, 20, 23, 25 ]) def testTokenType(self): self.assert_(ThaiWordFilter.DBBI_AVAILABLE, "JRE does not support Thai dictionary-based BreakIterator") self._assertAnalyzesTo(ThaiAnalyzer(Version.LUCENE_35), u"การที่ได้ต้องแสดงว่างานดี ๑๒๓", [ u"การ", u"ที่", u"ได้", u"ต้อง", u"แสดง", u"ว่า", u"งาน", u"ดี", u"๑๒๓" ], None, None, [ "", "", "", "", "", "", "", "", "" ]) def testPositionIncrements(self): self.assert_(ThaiWordFilter.DBBI_AVAILABLE, "JRE does not support Thai dictionary-based BreakIterator") analyzer = ThaiAnalyzer(Version.LUCENE_35) self._assertAnalyzesTo(analyzer, u"การที่ได้ต้อง the แสดงว่างานดี", [ u"การ", u"ที่", u"ได้", u"ต้อง", u"แสดง", u"ว่า", u"งาน", u"ดี" ], [ 0, 3, 6, 9, 18, 22, 25, 28 ], [ 3, 6, 9, 13, 22, 25, 28, 30 ], None, [ 1, 1, 1, 1, 2, 1, 1, 1 ]) # case that a stopword is adjacent to thai text, with no whitespace self._assertAnalyzesTo(analyzer, u"การที่ได้ต้องthe แสดงว่างานดี", [ u"การ", u"ที่", u"ได้", u"ต้อง", u"แสดง", u"ว่า", u"งาน", u"ดี" ], [ 0, 3, 6, 9, 17, 21, 24, 27 ], [ 3, 6, 9, 13, 21, 24, 27, 29 ], None, [ 1, 1, 1, 1, 2, 1, 1, 1 ]) def testAnalyzer30(self): analyzer = ThaiAnalyzer(Version.LUCENE_30) self._assertAnalyzesTo(analyzer, u"การที่ได้ต้อง the แสดงว่างานดี", [ u"การ", u"ที่", u"ได้", u"ต้อง", u"แสดง", u"ว่า", u"งาน", u"ดี" ], [ 0, 3, 6, 9, 18, 22, 25, 28 ], [ 3, 6, 9, 13, 22, 25, 28, 30 ], None, [ 1, 1, 1, 1, 2, 1, 1, 1 ]) # case that a stopword is adjacent to thai text, with no whitespace self._assertAnalyzesTo(analyzer, u"การที่ได้ต้องthe แสดงว่างานดี", [ u"การ", u"ที่", u"ได้", u"ต้อง", u"แสดง", u"ว่า", u"งาน", u"ดี" ], [ 0, 3, 6, 9, 17, 21, 24, 27 ], [ 3, 6, 9, 13, 21, 24, 27, 29 ], None, [ 1, 1, 1, 1, 2, 1, 1, 1 ]) def testAnalyzer30(self): analyzer = ThaiAnalyzer(Version.LUCENE_30) self._assertAnalyzesTo(analyzer, u"", []) self._assertAnalyzesTo(analyzer, u"การที่ได้ต้องแสดงว่างานดี", [ u"การ", u"ที่", u"ได้", u"ต้อง", u"แสดง", u"ว่า", u"งาน", u"ดี" ]) self._assertAnalyzesTo(analyzer, u"บริษัทชื่อ XY&Z - คุยกับ xyz@demo.com", [ u"บริษัท", u"ชื่อ", u"xy&z", u"คุย", u"กับ", u"xyz@demo.com" ]) # English stop words self._assertAnalyzesTo(analyzer, u"ประโยคว่า The quick brown fox jumped over the lazy dogs", [ u"ประโยค", u"ว่า", u"quick", u"brown", u"fox", u"jumped", u"over", u"lazy", u"dogs" ]) if __name__ == "__main__": lucene.initVM(vmargs=['-Djava.awt.headless=true']) if ThaiWordFilter.DBBI_AVAILABLE: if '-loop' in sys.argv: sys.argv.remove('-loop') while True: try: unittest.main() except: pass else: unittest.main() else: print >>sys.stderr, "Thai not supported by this JVM, tests skipped" pylucene-4.10.1-1/samples/FacetExample.py000644 000765 000000 00000030052 12356514472 020367 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== # # Author: Thomas Koch # # FacetExample.py - a simple Facet example for PyLucene # (originally based on the Java counterpart from # package org.apache.lucene.facet.example.simple # later updated to new Facet API) # ==================================================================== usage = """ usage: python FacetExample.py [index | simple | drilldown] where 'index' => create index for faceted search 'simple' => run simple faceted search 'drilldown' => run faceted search with drilldown """ INDEX_DIR = "FacetExample.Index" TAXONOMY_DIR = "FacetExample.Taxonomy" import os, sys, lucene from java.io import File from java.lang import System from java.text import DecimalFormat from java.util import Arrays from org.apache.lucene.util import Version from org.apache.lucene.analysis.core import WhitespaceAnalyzer from org.apache.lucene.search import IndexSearcher, TermQuery, MatchAllDocsQuery from org.apache.lucene.store import FSDirectory, SimpleFSDirectory from org.apache.lucene.index import (IndexWriter, IndexReader, DirectoryReader, Term, IndexWriterConfig) from org.apache.lucene.document import Document, Field, TextField from org.apache.lucene.facet import DrillSideways, DrillDownQuery from org.apache.lucene.facet import (Facets, FacetField, FacetResult, FacetsConfig, FacetsCollector) from org.apache.lucene.facet.taxonomy import FastTaxonomyFacetCounts from org.apache.lucene.facet.taxonomy.directory import (DirectoryTaxonomyWriter, DirectoryTaxonomyReader) # ----------------------------------------------------------------------------- # SimpleUtils: # Documents title field TITLE = "title" TEXT = "text" docTexts = [ "The white car is the one I want.", # doc nr.0 "The white dog does not belong to anyone." # doc nr.1 ] # sample documents titles (for the title field). docTitles = [ "white car", # doc nr.0 "white dog", # doc nr.1 ] # Authors: author[n] == Author of n-th document # example for simple, single-value facet authors = [ "Bob", # doc nr.0 "Lisa" # doc nr.1 ] # Categories: categories[D][N] == category-path no. N for document no. D. # example for hierarchical multi-value facet categories = [ [["root","a","f1"], ["root","a","f2"]], # doc nr.0 [["root","a","f1"], ["root","a","f3"]] # doc nr.1 ] # samples for (drilldown) search searchValues = ['white', 'car'] drilldownCategories = [["root","a","f1"], ["root","a","f2"]] # ----------------------------------------------------------------------------- # Sample indexer creates an index, and adds to it sample documents and facets. class SimpleIndexer(object): def index (cls, indexDir, taxoDir, facets_config): """Create an index, and adds to it sample documents and facets. indexDir Directory in which the index should be created. taxoDir Directory in which the taxonomy index should be created. """ # create and open an index writer config = IndexWriterConfig(Version.LUCENE_48, WhitespaceAnalyzer(Version.LUCENE_48)) config.setOpenMode(IndexWriterConfig.OpenMode.CREATE) iw = IndexWriter(indexDir, config) # create and open a taxonomy writer taxo = DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE) # loop over sample documents nDocsAdded = 0 nFacetsAdded = 0 for docNum in range(len(docTexts)): # create a plain Lucene document and add some regular Lucene fields to it doc = Document() doc.add(TextField(TITLE, docTitles[docNum], Field.Store.YES)) doc.add(TextField(TEXT, docTexts[docNum], Field.Store.NO)) # obtain the sample facets for current document facets = categories[docNum] author = authors[docNum] # ... and use the FacetField class for adding facet fields to # the Lucene document (and via FacetsConfig to the taxonomy index) doc.add(FacetField("Author", author)) for f in facets: doc.add(FacetField("Categories", f)) # finally add the document to the index iw.addDocument(facets_config.build(taxo, doc)) nDocsAdded += 1 # close the taxonomy index and the index - all modifications are # now safely in the provided directories: indexDir and taxoDir. iw.close() taxo.close() print "Indexed %d documents with facets." % nDocsAdded index = classmethod(index) # ----------------------------------------------------------------------------- # SimpleSearcer searches index with facets. class SimpleSearcher(object): def searchWithFacets(cls, indexReader, taxoReader, facets_config): """ Search an index with facets. return a list of FacetResult instances """ # MatchAllDocsQuery is for "browsing" (counts facets for all non-deleted docs in the index) query = MatchAllDocsQuery() return cls.searchWithQuery(query, indexReader, taxoReader, facets_config) def searchWithTerm(cls, query, indexReader, taxoReader, facets_config): """ Search an index with facets by using simple term query return a list of FacetResult instances """ query = TermQuery(Term(TEXT, query)) return cls.searchWithQuery(query, indexReader, taxoReader, facets_config) def searchWithQuery(cls, query, indexReader, taxoReader, facets_config): """ Search an index with facets for a given query return a list of FacetResult instances """ # prepare searcher to search against searcher = IndexSearcher(indexReader) # create a FacetsCollector to use in our facetted search: facets_collector = FacetsCollector() FacetsCollector.search(searcher, query, 10, facets_collector) # Count both "Categories" and "Author" dimensions facets = FastTaxonomyFacetCounts(taxoReader, facets_config, facets_collector) results = [] facet_result = facets.getTopChildren(10, "Categories") if facet_result: results.append(facet_result) print "Categories: ", facet_result.childCount for lv in facet_result.labelValues: print " '%s' (%s)" % (lv.label, lv.value) facet_result = facets.getTopChildren(10, "Categories", "root", "a") if facet_result: results.append(facet_result) print "Root-a-Categories: ", facet_result.childCount for lv in facet_result.labelValues: print " '%s' (%s)" % (lv.label, lv.value) facet_result = facets.getTopChildren(10, "Author") if facet_result: results.append(facet_result) print "Author: ", facet_result.childCount for lv in facet_result.labelValues: print " '%s' (%s)" % (lv.label, lv.value) return results def searchWithDrillDown(cls, drilldownCategory, indexReader, taxoReader, facets_config): """ Search an index with facets drill-down. return a list of FacetResult instances """ # User drills down on 'Categories' "root/a/f1" and we return facets for 'Author' searcher = IndexSearcher(indexReader) # Passing no baseQuery means we drill down on all documents ("browse only"): query = DrillDownQuery(facets_config) # Now user drills down on Publish Date/2010: query.add("Categories", drilldownCategory) facets_collector = FacetsCollector() FacetsCollector.search(searcher, query, 10, facets_collector) # Retrieve results facets = FastTaxonomyFacetCounts(taxoReader, facets_config, facets_collector) facet_result = facets.getTopChildren(10, "Author") print "Author: ", facet_result.childCount for lv in facet_result.labelValues: print " '%s' (%s)" % (lv.label, lv.value) return facet_result searchWithFacets = classmethod(searchWithFacets) searchWithTerm = classmethod(searchWithTerm) searchWithQuery = classmethod(searchWithQuery) searchWithDrillDown = classmethod(searchWithDrillDown) # ----------------------------------------------------------------------------- class FacetExample(object): def __init__(self, directory): self.directory = directory # create Directories for the search index and for the taxonomy index # in RAM or on Disc #indexDir = RAMDirectory() #taxoDir = RAMDirectory() self.indexDir = FSDirectory.open(File(os.path.join(self.directory, INDEX_DIR))) self.taxoDir = FSDirectory.open(File(os.path.join(self.directory, TAXONOMY_DIR))) # FacetConfig self.facets_config = FacetsConfig() self.facets_config.setHierarchical("Categories", True) self.facets_config.setMultiValued("Categories", True) def createIndex(self): # index the sample documents SimpleIndexer.index(self.indexDir, self.taxoDir, self.facets_config) def runSimple(self): # open readers taxo = DirectoryTaxonomyReader(self.taxoDir) indexReader = DirectoryReader.open(self.indexDir) for term in searchValues: print "\nsearch by term '%s' ..." % term facetRes = SimpleSearcher.searchWithTerm(term, indexReader, taxo, self.facets_config) print "\nsearch all documents ..." facetRes = SimpleSearcher.searchWithFacets(indexReader, taxo, self.facets_config) # close readers taxo.close() indexReader.close() # return result return facetRes def runDrillDown(self): # open readers taxo = DirectoryTaxonomyReader(self.taxoDir) indexReader = DirectoryReader.open(self.indexDir) for drilldown in drilldownCategories: print "search with drilldown: %s" % '/'.join(drilldown) facetRes = SimpleSearcher.searchWithDrillDown(drilldown, indexReader, taxo, self.facets_config) # close readers taxo.close() indexReader.close() # return result return facetRes def main(cls, argv): baseDir = os.path.dirname(os.path.abspath(argv[0])) if len(argv) > 1: index = simple = drilldown = False for arg in argv[1:]: if arg == "index": index = True elif arg == "simple": simple = True elif arg == "drilldown": drilldown = True else: sys.exit(usage+"\nunknown argument: %s" % arg) else: index = simple = True drilldown = False example = FacetExample(baseDir) if index: example.createIndex() if simple: example.runSimple() if drilldown: example.runDrillDown() main = classmethod(main) if __name__ == '__main__': lucene.initVM(vmargs=['-Djava.awt.headless=true']) FacetExample.main(sys.argv) pylucene-4.10.1-1/samples/IndexFiles.py000644 000765 000000 00000007257 12203673435 020072 0ustar00vajdawheel000000 000000 #!/usr/bin/env python INDEX_DIR = "IndexFiles.index" import sys, os, lucene, threading, time from datetime import datetime from java.io import File from org.apache.lucene.analysis.miscellaneous import LimitTokenCountAnalyzer from org.apache.lucene.analysis.standard import StandardAnalyzer from org.apache.lucene.document import Document, Field, FieldType from org.apache.lucene.index import FieldInfo, IndexWriter, IndexWriterConfig from org.apache.lucene.store import SimpleFSDirectory from org.apache.lucene.util import Version """ This class is loosely based on the Lucene (java implementation) demo class org.apache.lucene.demo.IndexFiles. It will take a directory as an argument and will index all of the files in that directory and downward recursively. It will index on the file path, the file name and the file contents. The resulting Lucene index will be placed in the current directory and called 'index'. """ class Ticker(object): def __init__(self): self.tick = True def run(self): while self.tick: sys.stdout.write('.') sys.stdout.flush() time.sleep(1.0) class IndexFiles(object): """Usage: python IndexFiles """ def __init__(self, root, storeDir, analyzer): if not os.path.exists(storeDir): os.mkdir(storeDir) store = SimpleFSDirectory(File(storeDir)) analyzer = LimitTokenCountAnalyzer(analyzer, 1048576) config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer) config.setOpenMode(IndexWriterConfig.OpenMode.CREATE) writer = IndexWriter(store, config) self.indexDocs(root, writer) ticker = Ticker() print 'commit index', threading.Thread(target=ticker.run).start() writer.commit() writer.close() ticker.tick = False print 'done' def indexDocs(self, root, writer): t1 = FieldType() t1.setIndexed(True) t1.setStored(True) t1.setTokenized(False) t1.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS) t2 = FieldType() t2.setIndexed(True) t2.setStored(False) t2.setTokenized(True) t2.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) for root, dirnames, filenames in os.walk(root): for filename in filenames: if not filename.endswith('.txt'): continue print "adding", filename try: path = os.path.join(root, filename) file = open(path) contents = unicode(file.read(), 'iso-8859-1') file.close() doc = Document() doc.add(Field("name", filename, t1)) doc.add(Field("path", root, t1)) if len(contents) > 0: doc.add(Field("contents", contents, t2)) else: print "warning: no content in %s" % filename writer.addDocument(doc) except Exception, e: print "Failed in indexDocs:", e if __name__ == '__main__': if len(sys.argv) < 2: print IndexFiles.__doc__ sys.exit(1) lucene.initVM(vmargs=['-Djava.awt.headless=true']) print 'lucene', lucene.VERSION start = datetime.now() try: base_dir = os.path.dirname(os.path.abspath(sys.argv[0])) IndexFiles(sys.argv[1], os.path.join(base_dir, INDEX_DIR), StandardAnalyzer(Version.LUCENE_CURRENT)) end = datetime.now() print end - start except Exception, e: print "Failed: ", e raise e pylucene-4.10.1-1/samples/manindex.py000644 000765 000000 00000007770 12203673435 017643 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== # # Author: Erik Hatcher # # to index all man pages on $MANPATH or /usr/share/man: # python manindex.py pages # ==================================================================== import os, re, sys, lucene from subprocess import * from java.io import File from org.apache.lucene.analysis.miscellaneous import LimitTokenCountAnalyzer from org.apache.lucene.analysis.standard import StandardAnalyzer from org.apache.lucene.index import IndexWriter, IndexWriterConfig from org.apache.lucene.document import Document, Field, StringField, TextField from org.apache.lucene.store import SimpleFSDirectory from org.apache.lucene.util import Version def indexDirectory(dir): for name in os.listdir(dir): path = os.path.join(dir, name) if os.path.isfile(path): indexFile(dir, name) def indexFile(dir, filename): path = os.path.join(dir, filename) print " File: ", filename if filename.endswith('.gz'): child = Popen('gunzip -c ' + path + ' | groff -t -e -E -mandoc -Tascii | col -bx', shell=True, stdout=PIPE, cwd=os.path.dirname(dir)).stdout command, section = re.search('^(.*)\.(.*)\.gz$', filename).groups() else: child = Popen('groff -t -e -E -mandoc -Tascii ' + path + ' | col -bx', shell=True, stdout=PIPE, cwd=os.path.dirname(dir)).stdout command, section = re.search('^(.*)\.(.*)$', filename).groups() data = child.read() err = child.close() if err: raise RuntimeError, '%s failed with exit code %d' %(command, err) matches = re.search('^NAME$(.*?)^\S', data, re.MULTILINE | re.DOTALL) name = matches and matches.group(1) or '' matches = re.search('^(?:SYNOPSIS|SYNOPSYS)$(.*?)^\S', data, re.MULTILINE | re.DOTALL) synopsis = matches and matches.group(1) or '' matches = re.search('^(?:DESCRIPTION|OVERVIEW)$(.*?)', data, re.MULTILINE | re.DOTALL) description = matches and matches.group(1) or '' doc = Document() doc.add(Field("command", command, StringField.TYPE_STORED)) doc.add(Field("section", section, StringField.TYPE_STORED)) doc.add(Field("name", name.strip(), TextField.TYPE_STORED)) doc.add(Field("synopsis", synopsis.strip(), TextField.TYPE_STORED)) doc.add(Field("keywords", ' '.join((command, name, synopsis, description)), TextField.TYPE_NOT_STORED)) doc.add(Field("filename", os.path.abspath(path), StringField.TYPE_STORED)) writer.addDocument(doc) if __name__ == '__main__': if len(sys.argv) != 2: print "Usage: python manindex.py " else: lucene.initVM(vmargs=['-Djava.awt.headless=true']) directory = SimpleFSDirectory(File(sys.argv[1])) analyzer = StandardAnalyzer(Version.LUCENE_CURRENT) analyzer = LimitTokenCountAnalyzer(analyzer, 10000) config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer) writer = IndexWriter(directory, config) manpath = os.environ.get('MANPATH', '/usr/share/man').split(os.pathsep) for dir in manpath: print "Crawling", dir for name in os.listdir(dir): path = os.path.join(dir, name) if os.path.isdir(path): indexDirectory(path) writer.commit() writer.close() pylucene-4.10.1-1/samples/mansearch.py000644 000765 000000 00000005550 12203673435 017773 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== # # Author: Erik Hatcher # # to query the index generated with manindex.py # python mansearch.py # by default, the index is stored in 'pages', which can be overriden with # the MANDEX environment variable # ==================================================================== import sys, os, lucene from string import Template from datetime import datetime from getopt import getopt, GetoptError from java.io import File from org.apache.lucene.analysis.standard import StandardAnalyzer from org.apache.lucene.index import DirectoryReader from org.apache.lucene.queryparser.classic import QueryParser from org.apache.lucene.search import IndexSearcher from org.apache.lucene.store import SimpleFSDirectory from org.apache.lucene.util import Version if __name__ == '__main__': lucene.initVM(vmargs=['-Djava.awt.headless=true']) def usage(): print sys.argv[0], "[--format=] [--index=] [--stats] " print "default index is found from MANDEX environment variable" try: options, args = getopt(sys.argv[1:], '', ['format=', 'index=', 'stats']) except GetoptError: usage() sys.exit(2) format = "#name" indexDir = os.environ.get('MANDEX') or 'pages' stats = False for o, a in options: if o == "--format": format = a elif o == "--index": indexDir = a elif o == "--stats": stats = True class CustomTemplate(Template): delimiter = '#' template = CustomTemplate(format) fsDir = SimpleFSDirectory(File(indexDir)) searcher = IndexSearcher(DirectoryReader.open(fsDir)) analyzer = StandardAnalyzer(Version.LUCENE_CURRENT) parser = QueryParser(Version.LUCENE_CURRENT, "keywords", analyzer) parser.setDefaultOperator(QueryParser.Operator.AND) query = parser.parse(' '.join(args)) start = datetime.now() scoreDocs = searcher.search(query, 50).scoreDocs duration = datetime.now() - start if stats: print >>sys.stderr, "Found %d document(s) (in %s) that matched query '%s':" %(len(scoreDocs), duration, query) for scoreDoc in scoreDocs: doc = searcher.doc(scoreDoc.doc) table = dict((field.name(), field.stringValue()) for field in doc.getFields()) print template.substitute(table) pylucene-4.10.1-1/samples/PorterStemmerAnalyzer.py000644 000765 000000 00000005414 12203673435 022347 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== # This sample illustrates how to write an Analyzer 'extension' in Python. # # What is happening behind the scenes ? # # The PorterStemmerAnalyzer python class does not in fact extend Analyzer, # it merely provides an implementation for Analyzer's abstract tokenStream() # method. When an instance of PorterStemmerAnalyzer is passed to PyLucene, # with a call to IndexWriter(store, PorterStemmerAnalyzer(), True) for # example, the PyLucene SWIG-based glue code wraps it into an instance of # PythonAnalyzer, a proper java extension of Analyzer which implements a # native tokenStream() method whose job is to call the tokenStream() method # on the python instance it wraps. The PythonAnalyzer instance is the # Analyzer extension bridge to PorterStemmerAnalyzer. import sys, os, lucene from datetime import datetime from IndexFiles import IndexFiles from org.apache.lucene.analysis.core import \ LowerCaseFilter, StopFilter, StopAnalyzer from org.apache.lucene.analysis.en import PorterStemFilter from org.apache.lucene.analysis.standard import \ StandardTokenizer, StandardFilter from org.apache.lucene.util import Version from org.apache.pylucene.analysis import PythonAnalyzer class PorterStemmerAnalyzer(PythonAnalyzer): def createComponents(self, fieldName, reader): source = StandardTokenizer(Version.LUCENE_CURRENT, reader) filter = StandardFilter(Version.LUCENE_CURRENT, source) filter = LowerCaseFilter(Version.LUCENE_CURRENT, filter) filter = PorterStemFilter(filter) filter = StopFilter(Version.LUCENE_CURRENT, filter, StopAnalyzer.ENGLISH_STOP_WORDS_SET) return self.TokenStreamComponents(source, filter) if __name__ == '__main__': if len(sys.argv) < 2: print IndexFiles.__doc__ sys.exit(1) lucene.initVM(vmargs=['-Djava.awt.headless=true']) print 'lucene', lucene.VERSION start = datetime.now() try: IndexFiles(sys.argv[1], "index", PorterStemmerAnalyzer()) end = datetime.now() print end - start except Exception, e: print "Failed: ", e pylucene-4.10.1-1/samples/SearchFiles.py000644 000765 000000 00000003665 12203673435 020227 0ustar00vajdawheel000000 000000 #!/usr/bin/env python INDEX_DIR = "IndexFiles.index" import sys, os, lucene from java.io import File from org.apache.lucene.analysis.standard import StandardAnalyzer from org.apache.lucene.index import DirectoryReader from org.apache.lucene.queryparser.classic import QueryParser from org.apache.lucene.store import SimpleFSDirectory from org.apache.lucene.search import IndexSearcher from org.apache.lucene.util import Version """ This script is loosely based on the Lucene (java implementation) demo class org.apache.lucene.demo.SearchFiles. It will prompt for a search query, then it will search the Lucene index in the current directory called 'index' for the search query entered against the 'contents' field. It will then display the 'path' and 'name' fields for each of the hits it finds in the index. Note that search.close() is currently commented out because it causes a stack overflow in some cases. """ def run(searcher, analyzer): while True: print print "Hit enter with no input to quit." command = raw_input("Query:") if command == '': return print print "Searching for:", command query = QueryParser(Version.LUCENE_CURRENT, "contents", analyzer).parse(command) scoreDocs = searcher.search(query, 50).scoreDocs print "%s total matching documents." % len(scoreDocs) for scoreDoc in scoreDocs: doc = searcher.doc(scoreDoc.doc) print 'path:', doc.get("path"), 'name:', doc.get("name") if __name__ == '__main__': lucene.initVM(vmargs=['-Djava.awt.headless=true']) print 'lucene', lucene.VERSION base_dir = os.path.dirname(os.path.abspath(sys.argv[0])) directory = SimpleFSDirectory(File(os.path.join(base_dir, INDEX_DIR))) searcher = IndexSearcher(DirectoryReader.open(directory)) analyzer = StandardAnalyzer(Version.LUCENE_CURRENT) run(searcher, analyzer) del searcher pylucene-4.10.1-1/samples/TermPositionVector.py000644 000765 000000 00000003321 12203673435 021643 0ustar00vajdawheel000000 000000 import lucene from org.apache.lucene.analysis.miscellaneous import LimitTokenCountAnalyzer from org.apache.lucene.analysis.standard import StandardAnalyzer from org.apache.lucene.store import RAMDirectory from org.apache.lucene.document import Document, Field, FieldType from org.apache.lucene.util import BytesRef, BytesRefIterator, Version from org.apache.lucene.index import \ IndexWriterConfig, IndexWriter, DirectoryReader if __name__ == '__main__': lucene.initVM(vmargs=['-Djava.awt.headless=true']) directory = RAMDirectory() iconfig = IndexWriterConfig(Version.LUCENE_CURRENT, LimitTokenCountAnalyzer(StandardAnalyzer(Version.LUCENE_CURRENT), 100)) iwriter = IndexWriter(directory, iconfig) ft = FieldType() ft.setIndexed(True) ft.setStored(True) ft.setTokenized(True) ft.setStoreTermVectors(True) ft.setStoreTermVectorOffsets(True) ft.setStoreTermVectorPositions(True) ts = ["this bernhard is the text to be index text", "this claudia is the text to be indexed"] for t in ts: doc = Document() doc.add(Field("fieldname", t, ft)) iwriter.addDocument(doc) iwriter.commit() iwriter.close() ireader = DirectoryReader.open(directory) for doc in xrange(0, len(ts)): tv = ireader.getTermVector(doc, "fieldname") termsEnum = tv.iterator(None) for term in BytesRefIterator.cast_(termsEnum): dpEnum = termsEnum.docsAndPositions(None, None) dpEnum.nextDoc() # prime the enum which works only for the current doc freq = dpEnum.freq() print 'term:', term.utf8ToString() print ' freq:', freq for i in xrange(freq): print " pos:", dpEnum.nextPosition() print " off: %i-%i" %(dpEnum.startOffset(), dpEnum.endOffset()) print pylucene-4.10.1-1/samples/ThreadIndexFiles.py000644 000765 000000 00000003057 12203673435 021214 0ustar00vajdawheel000000 000000 # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== # This sample illustrates how to use a thread with PyLucene INDEX_DIR = "ThreadIndexFiles.index" import sys, os, threading, lucene from datetime import datetime from IndexFiles import IndexFiles from org.apache.lucene.analysis.standard import StandardAnalyzer from org.apache.lucene.util import Version if __name__ == '__main__': if len(sys.argv) < 2: print IndexFiles.__doc__ sys.exit(1) env=lucene.initVM(vmargs=['-Djava.awt.headless=true']) print 'lucene', lucene.VERSION def fn(): base_dir = os.path.dirname(os.path.abspath(sys.argv[0])) env.attachCurrentThread() start = datetime.now() IndexFiles(sys.argv[1], os.path.join(base_dir, INDEX_DIR), StandardAnalyzer(Version.LUCENE_CURRENT)) end = datetime.now() print end - start threading.Thread(target=fn).start() pylucene-4.10.1-1/python/collections.py000644 000765 000000 00000024055 12050060702 020211 0ustar00vajdawheel000000 000000 # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from lucene import JArray from java.lang import IllegalStateException, IndexOutOfBoundsException from java.util import NoSuchElementException from org.apache.pylucene.util import \ PythonSet, PythonList, PythonIterator, PythonListIterator class JavaSet(PythonSet): """ This class implements java.util.Set around a Python set instance it wraps. """ def __init__(self, _set): super(JavaSet, self).__init__() self._set = _set def __contains__(self, obj): return obj in self._set def __len__(self): return len(self._set) def __iter__(self): return iter(self._set) def add(self, obj): if obj not in self._set: self._set.add(obj) return True return False def addAll(self, collection): size = len(self._set) self._set.update(collection) return len(self._set) > size def clear(self): self._set.clear() def contains(self, obj): return obj in self._set def containsAll(self, collection): for obj in collection: if obj not in self._set: return False return True def equals(self, collection): if type(self) is type(collection): return self._set == collection._set return False def isEmpty(self): return len(self._set) == 0 def iterator(self): class _iterator(PythonIterator): def __init__(_self): super(_iterator, _self).__init__() _self._iterator = iter(self._set) def hasNext(_self): if hasattr(_self, '_next'): return True try: _self._next = _self._iterator.next() return True except StopIteration: return False def next(_self): if hasattr(_self, '_next'): next = _self._next del _self._next else: next = _self._iterator.next() return next return _iterator() def remove(self, obj): try: self._set.remove(obj) return True except KeyError: return False def removeAll(self, collection): result = False for obj in collection: try: self._set.remove(obj) result = True except KeyError: pass return result def retainAll(self, collection): result = False for obj in list(self._set): if obj not in collection: self._set.remove(obj) result = True return result def size(self): return len(self._set) def toArray(self): # JavaSet return list(self._set) class JavaListIterator(PythonListIterator): """ This class implements java.util.ListIterator for a Python list instance it wraps. (simple bidirectional iterator) """ def __init__(self, _lst, index=0): super(JavaListIterator, self).__init__() self._lst = _lst self._lastIndex = -1 # keep state for remove/set self.index = index def next(self): if self.index >= len(self._lst): raise JavaError, NoSuchElementException(str(self.index)) result = self._lst[self.index] self._lastIndex = self.index self.index += 1 return result def previous(self): if self.index <= 0: raise JavaError, NoSuchElementException(str(self.index - 1)) self.index -= 1 self._lastIndex = self.index return self._lst[self.index] def hasPrevious(self): return self.index > 0 def hasNext(self): return self.index < len(self._lst) def nextIndex(self): return min(self.index, len(self._lst)) def previousIndex(self): return max(-1, self.index - 1) def add(self, element): """ Inserts the specified element into the list. The element is inserted immediately before the next element that would be returned by next, if any, and after the next element that would be returned by previous, if any. """ if self._lastIndex < 0: raise JavaError, IllegalStateException("add") self._lst.insert(self.index, element) self.index += 1 self._lastIndex = -1 # invalidate state def remove(self): """ Removes from the list the last element that was returned by next or previous. """ if self._lastIndex < 0: raise JavaError, IllegalStateException("remove") del self._lst[self._lastIndex] self._lastIndex = -1 # invalidate state def set(self, element): """ Replaces the last element returned by next or previous with the specified element. """ if self._lastIndex < 0: raise JavaError, IllegalStateException("set") self._lst[self._lastIndex] = element def __iter__(self): return self class JavaList(PythonList): """ This class implements java.util.List around a Python list instance it wraps. """ def __init__(self, _lst): super(JavaList, self).__init__() self._lst = _lst def __contains__(self, obj): return obj in self._lst def __len__(self): return len(self._lst) def __iter__(self): return iter(self._lst) def add(self, index, obj): self._lst.insert(index, obj) def addAll(self, collection): size = len(self._lst) self._lst.extend(collection) return len(self._lst) > size def addAll(self, index, collection): size = len(self._lst) self._lst[index:index] = collection return len(self._lst) > size def clear(self): del self._lst[:] def contains(self, obj): return obj in self._lst def containsAll(self, collection): for obj in collection: if obj not in self._lst: return False return True def equals(self, collection): if type(self) is type(collection): return self._lst == collection._lst return False def get(self, index): if index < 0 or index >= self.size(): raise JavaError, IndexOutOfBoundsException(str(index)) return self._lst[index] def indexOf(self, obj): try: return self._lst.index(obj) except ValueError: return -1 def isEmpty(self): return len(self._lst) == 0 def iterator(self): class _iterator(PythonIterator): def __init__(_self): super(_iterator, _self).__init__() _self._iterator = iter(self._lst) def hasNext(_self): if hasattr(_self, '_next'): return True try: _self._next = _self._iterator.next() return True except StopIteration: return False def next(_self): if hasattr(_self, '_next'): next = _self._next del _self._next else: next = _self._iterator.next() return next return _iterator() def lastIndexOf(self, obj): i = len(self._lst)-1 while (i>=0): if obj.equals(self._lst[i]): break i -= 1 return i def listIterator(self, index=0): return JavaListIterator(self._lst, index) def remove(self, obj_or_index): if type(obj_or_index) is type(1): return removeAt(int(obj_or_index)) return removeElement(obj_or_index) def removeAt(self, pos): """ Removes the element at the specified position in this list. Note: private method called from Java via remove(int index) index is already checked (or IndexOutOfBoundsException thrown) """ try: el = self._lst[pos] del self._lst[pos] return el except IndexError: # should not happen return None def removeObject(self, obj): """ Removes the first occurrence of the specified object from this list, if it is present """ try: self._lst.remove(obj) return True except ValueError: return False def removeAll(self, collection): result = False for obj in collection: if self.removeElement(obj): result = True return result def retainAll(self, collection): result = False for obj in self._lst: if obj not in collection and self.removeElement(obj): result = True return result def size(self): return len(self._lst) def toArray(self): return self._lst def subListChecked(self, fromIndex, toIndex): """ Note: private method called from Java via subList() from/to index are already checked (or IndexOutOfBoundsException thrown) also IllegalArgumentException is thronw if the endpoint indices are out of order (fromIndex > toIndex) """ sublst = self._lst[fromIndex:toIndex] return JavaList(sublst) def set(self, index, obj): if index < 0 or index >= self.size(): raise JavaError, IndexOutOfBoundsException(str(index)) self._lst[index] = obj pylucene-4.10.1-1/python/ICUFoldingFilter.py000644 000765 000000 00000005067 11364157465 021012 0ustar00vajdawheel000000 000000 # -*- coding: utf-8 -*- # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== # # Port of java/org/apache/lucene/analysis/icu/ICUFoldingFilter.java # using IBM's C++ ICU wrapped by PyICU (http://pyicu.osafoundation.org) # # A TokenFilter that applies search term folding to Unicode text, # applying foldings from UTR#30 Character Foldings. # # This filter applies the following foldings from the report to unicode text: # # Accent removal # Case folding # Canonical duplicates folding # Dashes folding # Diacritic removal (including stroke, hook, descender) # Greek letterforms folding # Han Radical folding # Hebrew Alternates folding # Jamo folding # Letterforms folding # Math symbol folding # Multigraph Expansions: All # Native digit folding # No-break folding # Overline folding # Positional forms folding # Small forms folding # Space folding # Spacing Accents folding # Subscript folding # Superscript folding # Suzhou Numeral folding # Symbol folding # Underline folding # Vertical forms folding # Width folding # # Additionally, Default Ignorables are removed, and text is normalized to NFKC. # All foldings, case folding, and normalization mappings are applied # recursively to ensure a fully folded and normalized result. # # ==================================================================== import os, lucene from lucene.ICUNormalizer2Filter import ICUNormalizer2Filter from icu import ResourceBundle, Normalizer2, UNormalizationMode2 utr30 = os.path.join(lucene.__dir__, 'resources', 'org', 'apache', 'lucene', 'analysis', 'icu', 'utr30.dat') ResourceBundle.setAppData("utr30", utr30) class ICUFoldingFilter(ICUNormalizer2Filter): def __init__(self, input): normalizer = Normalizer2.getInstance("utr30", "utr30", UNormalizationMode2.COMPOSE) super(ICUFoldingFilter, self).__init__(input, normalizer) pylucene-4.10.1-1/python/ICUNormalizer2Filter.py000644 000765 000000 00000005624 12016246051 021614 0ustar00vajdawheel000000 000000 # -*- coding: utf-8 -*- # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== # # Port of java/org/apache/lucene/analysis/icu/ICUNormalizer2Filter.java # using IBM's C++ ICU wrapped by PyICU (http://pyicu.osafoundation.org) # # Normalize token text with ICU's {@link com.ibm.icu.text.Normalizer2} # # With this filter, you can normalize text in the following ways: # - NFKC Normalization, Case Folding, and removing Ignorables (the default) # - Using a standard Normalization mode (NFC, NFD, NFKC, NFKD) # - Based on rules from a custom normalization mapping. # # If you use the defaults, this filter is a simple way to standardize # Unicode text in a language-independent way for search: # - The case folding that it does can be seen as a replacement for # LowerCaseFilter: For example, it handles cases such as the Greek # sigma, so that "Μάϊος" and "ΜΆΪΟΣ" will match correctly. # - The normalization will standardizes different forms of the same # character in Unicode. For example, CJK full-width numbers will be # standardized to their ASCII forms. # - Ignorables such as Zero-Width Joiner and Variation Selectors are # removed. These are typically modifier characters that affect display. # # ==================================================================== from icu import Normalizer2, UNormalizationMode2, UNormalizationCheckResult from org.apache.lucene.analysis.tokenattributes import CharTermAttribute from org.apache.pylucene.analysis import PythonTokenFilter class ICUNormalizer2Filter(PythonTokenFilter): def __init__(self, input, normalizer=None): super(ICUNormalizer2Filter, self).__init__(input) self.input = input self.termAtt = self.addAttribute(CharTermAttribute.class_); if normalizer is None: normalizer = Normalizer2.getInstance(None, "nfkc_cf", UNormalizationMode2.COMPOSE) self.normalizer = normalizer def incrementToken(self): if self.input.incrementToken(): text = self.termAtt.toString() if self.normalizer.quickCheck(text) != UNormalizationCheckResult.YES: self.termAtt.setEmpty() self.termAtt.append(self.normalizer.normalize(text)) return True return False pylucene-4.10.1-1/python/ICUTransformFilter.py000644 000765 000000 00000006424 12016246051 021362 0ustar00vajdawheel000000 000000 # -*- coding: utf-8 -*- # ==================================================================== # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ==================================================================== # # Port of java/org/apache/lucene/analysis/icu/ICUTransformFilter.java # using IBM's C++ ICU wrapped by PyICU (http://pyicu.osafoundation.org) # # A TokenFilter that transforms text with ICU. # # ICU provides text-transformation functionality via its Transliteration API. # Although script conversion is its most common use, a Transliterator can # actually perform a more general class of tasks. In fact, Transliterator # defines a very general API which specifies only that a segment of the input # text is replaced by new text. The particulars of this conversion are # determined entirely by subclasses of Transliterator. # # Some useful transformations for search are built-in: # - Conversion from Traditional to Simplified Chinese characters # - Conversion from Hiragana to Katakana # - Conversion from Fullwidth to Halfwidth forms. # - Script conversions, for example Serbian Cyrillic to Latin # # Example usage:
stream = new ICUTransformFilter(stream, # Transliterator.getInstance("Traditional-Simplified"));
# # For more details, see the ICU User Guide at: # http://userguide.icu-project.org/transforms/general # # ==================================================================== from org.apache.pylucene.analysis import PythonTokenFilter from org.apache.lucene.analysis.tokenattributes import CharTermAttribute from icu import UTransPosition class ICUTransformFilter(PythonTokenFilter): # Create a new ICUTransformFilter that transforms text on the given # stream. # # @param input {@link TokenStream} to filter. # @param transform Transliterator to transform the text. def __init__(self, input, transform): super(ICUTransformFilter, self).__init__(input) # Reusable position object self.position = UTransPosition() # term attribute, will be updated with transformed text. self.termAtt = self.addAttribute(CharTermAttribute.class_) self.input = input self.transform = transform def incrementToken(self): if self.input.incrementToken(): text = self.termAtt.toString() length = len(text) self.position.start = 0 self.position.limit = length self.position.contextStart = 0 self.position.contextLimit = length text = self.transform.filteredTransliterate(text, self.position, False) self.termAtt.setEmpty() self.termAtt.append(text) return True return False pylucene-4.10.1-1/java/org/000755 000765 000000 00000000000 12413103672 015511 5ustar00vajdawheel000000 000000 pylucene-4.10.1-1/java/org/apache/000755 000765 000000 00000000000 12413103672 016732 5ustar00vajdawheel000000 000000 pylucene-4.10.1-1/java/org/apache/pylucene/000755 000765 000000 00000000000 12413103672 020556 5ustar00vajdawheel000000 000000 pylucene-4.10.1-1/java/org/apache/pylucene/analysis/000755 000765 000000 00000000000 12413103672 022401 5ustar00vajdawheel000000 000000 pylucene-4.10.1-1/java/org/apache/pylucene/index/000755 000765 000000 00000000000 12413103672 021665 5ustar00vajdawheel000000 000000 pylucene-4.10.1-1/java/org/apache/pylucene/queryparser/000755 000765 000000 00000000000 12413103672 023140 5ustar00vajdawheel000000 000000 pylucene-4.10.1-1/java/org/apache/pylucene/search/000755 000765 000000 00000000000 12413103672 022023 5ustar00vajdawheel000000 000000 pylucene-4.10.1-1/java/org/apache/pylucene/store/000755 000765 000000 00000000000 12413103672 021712 5ustar00vajdawheel000000 000000 pylucene-4.10.1-1/java/org/apache/pylucene/util/000755 000765 000000 00000000000 12413103672 021533 5ustar00vajdawheel000000 000000 pylucene-4.10.1-1/java/org/apache/pylucene/util/PythonAttribute.java000644 000765 000000 00000001526 11776645223 025565 0ustar00vajdawheel000000 000000 /* ==================================================================== * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== */ package org.apache.pylucene.util; import org.apache.lucene.util.Attribute; public interface PythonAttribute extends Attribute { } pylucene-4.10.1-1/java/org/apache/pylucene/util/PythonAttributeImpl.java000644 000765 000000 00000002614 11776645223 026406 0ustar00vajdawheel000000 000000 /* ==================================================================== * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== */ package org.apache.pylucene.util; import org.apache.lucene.util.AttributeImpl; public class PythonAttributeImpl extends AttributeImpl { private long pythonObject; public PythonAttributeImpl() { } public void pythonExtension(long pythonObject) { this.pythonObject = pythonObject; } public long pythonExtension() { return this.pythonObject; } public void finalize() throws Throwable { pythonDecRef(); } public native void pythonDecRef(); public native void clear(); public native void copyTo(AttributeImpl target); public native boolean equals(Object obj); public native int hashCode(); } pylucene-4.10.1-1/java/org/apache/pylucene/util/PythonComparable.java000644 000765 000000 00000002337 11140022613 025641 0ustar00vajdawheel000000 000000 /* ==================================================================== * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== */ package org.apache.pylucene.util; public class PythonComparable implements Comparable { private long pythonObject; public PythonComparable() { } public void pythonExtension(long pythonObject) { this.pythonObject = pythonObject; } public long pythonExtension() { return this.pythonObject; } public void finalize() throws Throwable { pythonDecRef(); } public native void pythonDecRef(); public native int compareTo(Object o); } pylucene-4.10.1-1/java/org/apache/pylucene/util/PythonIterator.java000644 000765 000000 00000002556 11140022613 025370 0ustar00vajdawheel000000 000000 /* ==================================================================== * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== */ package org.apache.pylucene.util; import java.util.Iterator; public class PythonIterator implements Iterator { private long pythonObject; public PythonIterator() { } public void pythonExtension(long pythonObject) { this.pythonObject = pythonObject; } public long pythonExtension() { return this.pythonObject; } public void finalize() throws Throwable { pythonDecRef(); } public native void pythonDecRef(); public native boolean hasNext(); public native Object next(); public void remove() { throw new UnsupportedOperationException(); } } pylucene-4.10.1-1/java/org/apache/pylucene/util/PythonList.java000644 000765 000000 00000006624 11776052737 024543 0ustar00vajdawheel000000 000000 /* ==================================================================== * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== */ package org.apache.pylucene.util; import java.util.List; import java.util.ListIterator; import java.util.Collection; import java.util.Iterator; import java.lang.reflect.Array; public class PythonList implements List { private long pythonObject; public PythonList() { } public void pythonExtension(long pythonObject) { this.pythonObject = pythonObject; } public long pythonExtension() { return this.pythonObject; } public void finalize() throws Throwable { pythonDecRef(); } public native void pythonDecRef(); public native boolean add(Object obj); public native void add(int index, Object obj); public native boolean addAll(Collection c); public native boolean addAll(int index, Collection c); public native void clear(); public native boolean contains(Object obj); public native boolean containsAll(Collection c); public native boolean equals(Object obj); public native Object get(int index); // public native int hashCode(); public native int indexOf(Object obj); public native boolean isEmpty(); public native Iterator iterator(); public native int lastIndexOf(Object obj); public native ListIterator listIterator(int index); public ListIterator listIterator() { return listIterator(0); } private native Object removeAt(int index); public Object remove(int index) throws IndexOutOfBoundsException { if (index < 0 || index >= this.size()) throw new IndexOutOfBoundsException(); return removeAt(index); } private native boolean removeObject(Object obj); public boolean remove(Object obj) { return removeObject(obj); } public native boolean removeAll(Collection c); public native boolean retainAll(Collection c); public native Object set(int index, Object obj); public native int size(); private native List subListChecked(int fromIndex, int toIndex); public List subList(int fromIndex, int toIndex) throws IndexOutOfBoundsException, IllegalArgumentException { if (fromIndex < 0 || toIndex >= size() || fromIndex > toIndex) throw new IndexOutOfBoundsException(); return subListChecked(fromIndex, toIndex); } public native Object[] toArray(); public Object[] toArray(Object[] a) { Object[] array = toArray(); if (a.length < array.length) a = (Object[]) Array.newInstance(a.getClass().getComponentType(), array.length); System.arraycopy(array, 0, a, 0, array.length); return a; } } pylucene-4.10.1-1/java/org/apache/pylucene/util/PythonListIterator.java000644 000765 000000 00000002177 11776052737 026254 0ustar00vajdawheel000000 000000 /* ==================================================================== * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== */ package org.apache.pylucene.util; import java.util.ListIterator; public class PythonListIterator extends PythonIterator implements ListIterator { public native boolean hasPrevious(); public native Object previous(); public native int nextIndex(); public native int previousIndex(); public native void set(Object obj); public native void add(Object obj); public native void remove(); } pylucene-4.10.1-1/java/org/apache/pylucene/util/PythonSet.java000644 000765 000000 00000004233 11776052737 024355 0ustar00vajdawheel000000 000000 /* ==================================================================== * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== */ package org.apache.pylucene.util; import java.util.Set; import java.util.Collection; import java.util.Iterator; import java.lang.reflect.Array; public class PythonSet implements Set { private long pythonObject; public PythonSet() { } public void pythonExtension(long pythonObject) { this.pythonObject = pythonObject; } public long pythonExtension() { return this.pythonObject; } public void finalize() throws Throwable { pythonDecRef(); } public native void pythonDecRef(); public native boolean add(Object obj); public native boolean addAll(Collection c); public native void clear(); public native boolean contains(Object obj); public native boolean containsAll(Collection c); public native boolean equals(Object obj); public native boolean isEmpty(); public native Iterator iterator(); public native boolean remove(Object obj); public native boolean removeAll(Collection c); public native boolean retainAll(Collection c); public native int size(); public native Object[] toArray(); public Object[] toArray(Object[] a) { Object[] array = toArray(); if (a.length < array.length) a = (Object[]) Array.newInstance(a.getClass().getComponentType(), array.length); System.arraycopy(array, 0, a, 0, array.length); return a; } } pylucene-4.10.1-1/java/org/apache/pylucene/store/PythonDirectory.java000644 000765 000000 00000005203 12223651517 025730 0ustar00vajdawheel000000 000000 /* ==================================================================== * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== */ package org.apache.pylucene.store; import java.io.IOException; import java.util.Collection; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.LockFactory; import org.apache.lucene.store.Lock; public class PythonDirectory extends Directory { private long pythonObject; public PythonDirectory() { } public void pythonExtension(long pythonObject) { this.pythonObject = pythonObject; } public long pythonExtension() { return this.pythonObject; } public void finalize() throws Throwable { pythonDecRef(); } public void sync(Collection names) throws IOException { for (String name : names) sync(name); } public native void pythonDecRef(); public native void close() throws IOException; public native IndexOutput createOutput(String name, IOContext context) throws IOException; public native void deleteFile(String name) throws IOException; public native boolean fileExists(String name) throws IOException; public native long fileLength(String name) throws IOException; public native long fileModified(String name) throws IOException; public native String[] listAll() throws IOException; public native IndexInput openInput(String name, IOContext context) throws IOException; public native void touchFile(String name) throws IOException; public native void sync(String name) throws IOException; public native LockFactory getLockFactory(); public native void setLockFactory(LockFactory lockFactory) throws IOException; public native void clearLock(String name) throws IOException; public native Lock makeLock(String name); } pylucene-4.10.1-1/java/org/apache/pylucene/store/PythonIndexInput.java000644 000765 000000 00000004144 12063232331 026045 0ustar00vajdawheel000000 000000 /* ==================================================================== * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== */ package org.apache.pylucene.store; import java.io.IOException; import org.apache.lucene.store.BufferedIndexInput; import org.apache.lucene.store.IOContext; public class PythonIndexInput extends BufferedIndexInput { private long pythonObject; public PythonIndexInput(String resourceDesc) { super(resourceDesc); } public PythonIndexInput(String resourceDesc, int bufferSize) { super(resourceDesc, bufferSize); } public PythonIndexInput(String resourceDesc, IOContext context) { super(resourceDesc, context); } public void pythonExtension(long pythonObject) { this.pythonObject = pythonObject; } public long pythonExtension() { return this.pythonObject; } public void finalize() throws Throwable { pythonDecRef(); } public native void pythonDecRef(); public native PythonIndexInput clone(); public native long length(); public native void close() throws IOException; public native byte[] readInternal(int length, long pos) throws IOException; public native void seekInternal(long pos) throws IOException; protected void readInternal(byte[] b, int offset, int length) throws IOException { byte[] data = readInternal(length, getFilePointer()); System.arraycopy(data, 0, b, offset, data.length); } } pylucene-4.10.1-1/java/org/apache/pylucene/store/PythonIndexOutput.java000644 000765 000000 00000003723 12356527510 026263 0ustar00vajdawheel000000 000000 /* ==================================================================== * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== */ package org.apache.pylucene.store; import java.io.IOException; import org.apache.lucene.store.IndexOutput; public class PythonIndexOutput extends IndexOutput { private long pythonObject; public PythonIndexOutput() { } public void pythonExtension(long pythonObject) { this.pythonObject = pythonObject; } public long pythonExtension() { return this.pythonObject; } public void finalize() throws Throwable { pythonDecRef(); } public native void pythonDecRef(); public void flush() throws IOException {} public native long getFilePointer(); public native long getChecksum() throws IOException; public native void close() throws IOException; public native void writeByte(byte b) throws IOException; public native void writeBytes(byte[] bytes) throws IOException; public void writeBytes(byte[] bytes, int offset, int length) throws IOException { if (offset > 0 || length < bytes.length) { byte[] data = new byte[length]; System.arraycopy(bytes, offset, data, 0, length); writeBytes(data); } else writeBytes(bytes); } } pylucene-4.10.1-1/java/org/apache/pylucene/store/PythonLock.java000644 000765 000000 00000002522 12320050337 024644 0ustar00vajdawheel000000 000000 /* ==================================================================== * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== */ package org.apache.pylucene.store; import org.apache.lucene.store.Lock; public class PythonLock extends Lock { private long pythonObject; public PythonLock() { } public void pythonExtension(long pythonObject) { this.pythonObject = pythonObject; } public long pythonExtension() { return this.pythonObject; } public void finalize() throws Throwable { pythonDecRef(); } public native void pythonDecRef(); public native boolean isLocked(); public native boolean obtain(); public native void release(); public native void close(); } pylucene-4.10.1-1/java/org/apache/pylucene/store/PythonLockFactory.java000644 000765 000000 00000002645 11265527030 026210 0ustar00vajdawheel000000 000000 /* ==================================================================== * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== */ package org.apache.pylucene.store; import java.io.IOException; import org.apache.lucene.store.Lock; import org.apache.lucene.store.LockFactory; public class PythonLockFactory extends LockFactory { private long pythonObject; public PythonLockFactory() { } public void pythonExtension(long pythonObject) { this.pythonObject = pythonObject; } public long pythonExtension() { return this.pythonObject; } public void finalize() throws Throwable { pythonDecRef(); } public native void pythonDecRef(); public native Lock makeLock(String lockName); public native void clearLock(String lockName) throws IOException; } pylucene-4.10.1-1/java/org/apache/pylucene/search/highlight/000755 000765 000000 00000000000 12413103672 023772 5ustar00vajdawheel000000 000000 pylucene-4.10.1-1/java/org/apache/pylucene/search/PythonByteParser.java000644 000765 000000 00000003040 12106374252 026150 0ustar00vajdawheel000000 000000 /* ==================================================================== * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== */ package org.apache.pylucene.search; import java.io.IOException; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.FieldCache; import org.apache.lucene.util.BytesRef; /** * @author Andi Vajda */ public class PythonByteParser implements FieldCache.ByteParser { private long pythonObject; public PythonByteParser() { } public void pythonExtension(long pythonObject) { this.pythonObject = pythonObject; } public long pythonExtension() { return this.pythonObject; } public void finalize() throws Throwable { pythonDecRef(); } public native void pythonDecRef(); public native byte parseByte(BytesRef ref); public native TermsEnum termsEnum(Terms terms) throws IOException; } pylucene-4.10.1-1/java/org/apache/pylucene/search/PythonCollector.java000644 000765 000000 00000003463 11776052737 026044 0ustar00vajdawheel000000 000000 /* ==================================================================== * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== */ package org.apache.pylucene.search; import java.io.IOException; import org.apache.lucene.search.Collector; import org.apache.lucene.search.Scorer; import org.apache.lucene.index.AtomicReaderContext; public class PythonCollector extends Collector { private long pythonObject; public PythonCollector() { } public void pythonExtension(long pythonObject) { this.pythonObject = pythonObject; } public long pythonExtension() { return this.pythonObject; } public void finalize() throws Throwable { pythonDecRef(); } protected Scorer scorer; public void setScorer(Scorer scorer) throws IOException { this.scorer = scorer; } public void collect(int doc) throws IOException { collect(doc, scorer.score()); } public native void pythonDecRef(); public native void collect(int doc, float score) throws IOException; public native void setNextReader(AtomicReaderContext context) throws IOException; public native boolean acceptsDocsOutOfOrder(); } pylucene-4.10.1-1/java/org/apache/pylucene/search/PythonDoubleParser.java000644 000765 000000 00000003052 12106374252 026462 0ustar00vajdawheel000000 000000 /* ==================================================================== * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== */ package org.apache.pylucene.search; import java.io.IOException; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.FieldCache; import org.apache.lucene.util.BytesRef; /** * @author Andi Vajda */ public class PythonDoubleParser implements FieldCache.DoubleParser { private long pythonObject; public PythonDoubleParser() { } public void pythonExtension(long pythonObject) { this.pythonObject = pythonObject; } public long pythonExtension() { return this.pythonObject; } public void finalize() throws Throwable { pythonDecRef(); } public native void pythonDecRef(); public native double parseDouble(BytesRef ref); public native TermsEnum termsEnum(Terms terms) throws IOException; } pylucene-4.10.1-1/java/org/apache/pylucene/search/PythonFieldComparator.java000644 000765 000000 00000003701 12270547743 027160 0ustar00vajdawheel000000 000000 /* ==================================================================== * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== */ package org.apache.pylucene.search; import java.io.IOException; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.index.AtomicReaderContext; /** * @author Andi Vajda */ public class PythonFieldComparator extends FieldComparator { private long pythonObject; public PythonFieldComparator() { } public void pythonExtension(long pythonObject) { this.pythonObject = pythonObject; } public long pythonExtension() { return this.pythonObject; } public void finalize() throws Throwable { pythonDecRef(); } public native void pythonDecRef(); public native int compare(int slot1, int slot2); public native int compareBottom(int doc) throws IOException; public native int compareTop(int doc) throws IOException; public native void setBottom(final int slot); public native void setTopValue(T value); public native void copy(int slot, int doc) throws IOException; public native FieldComparator setNextReader(AtomicReaderContext context) throws IOException; public native T value(int slot); public native int compareDocToValue(int doc, T value) throws IOException; } pylucene-4.10.1-1/java/org/apache/pylucene/search/PythonFieldComparatorSource.java000644 000765 000000 00000003136 11260501155 030324 0ustar00vajdawheel000000 000000 /* ==================================================================== * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== */ package org.apache.pylucene.search; import java.io.IOException; import org.apache.lucene.search.FieldComparatorSource; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.index.IndexReader; /** * @author Andi Vajda */ public class PythonFieldComparatorSource extends FieldComparatorSource { private long pythonObject; public PythonFieldComparatorSource() { } public void pythonExtension(long pythonObject) { this.pythonObject = pythonObject; } public long pythonExtension() { return this.pythonObject; } public void finalize() throws Throwable { pythonDecRef(); } public native void pythonDecRef(); public native FieldComparator newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws IOException; } pylucene-4.10.1-1/java/org/apache/pylucene/search/PythonFilter.java000644 000765 000000 00000002740 11776054615 025335 0ustar00vajdawheel000000 000000 /* ==================================================================== * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== */ package org.apache.pylucene.search; import java.io.IOException; import org.apache.lucene.search.Filter; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.util.Bits; import org.apache.lucene.index.AtomicReaderContext; public class PythonFilter extends Filter { private long pythonObject; public PythonFilter() { } public void pythonExtension(long pythonObject) { this.pythonObject = pythonObject; } public long pythonExtension() { return this.pythonObject; } public void finalize() throws Throwable { pythonDecRef(); } public native void pythonDecRef(); public native DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException; } pylucene-4.10.1-1/java/org/apache/pylucene/search/PythonFloatParser.java000644 000765 000000 00000003045 12106374252 026317 0ustar00vajdawheel000000 000000 /* ==================================================================== * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== */ package org.apache.pylucene.search; import java.io.IOException; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.FieldCache; import org.apache.lucene.util.BytesRef; /** * @author Andi Vajda */ public class PythonFloatParser implements FieldCache.FloatParser { private long pythonObject; public PythonFloatParser() { } public void pythonExtension(long pythonObject) { this.pythonObject = pythonObject; } public long pythonExtension() { return this.pythonObject; } public void finalize() throws Throwable { pythonDecRef(); } public native void pythonDecRef(); public native float parseFloat(BytesRef ref); public native TermsEnum termsEnum(Terms terms) throws IOException; } pylucene-4.10.1-1/java/org/apache/pylucene/search/PythonIntParser.java000644 000765 000000 00000003033 12106374252 026001 0ustar00vajdawheel000000 000000 /* ==================================================================== * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== */ package org.apache.pylucene.search; import java.io.IOException; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.FieldCache; import org.apache.lucene.util.BytesRef; /** * @author Andi Vajda */ public class PythonIntParser implements FieldCache.IntParser { private long pythonObject; public PythonIntParser() { } public void pythonExtension(long pythonObject) { this.pythonObject = pythonObject; } public long pythonExtension() { return this.pythonObject; } public void finalize() throws Throwable { pythonDecRef(); } public native void pythonDecRef(); public native int parseInt(BytesRef ref); public native TermsEnum termsEnum(Terms terms) throws IOException; } pylucene-4.10.1-1/java/org/apache/pylucene/search/PythonLongParser.java000644 000765 000000 00000003040 12106374252 026144 0ustar00vajdawheel000000 000000 /* ==================================================================== * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== */ package org.apache.pylucene.search; import java.io.IOException; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.FieldCache; import org.apache.lucene.util.BytesRef; /** * @author Andi Vajda */ public class PythonLongParser implements FieldCache.LongParser { private long pythonObject; public PythonLongParser() { } public void pythonExtension(long pythonObject) { this.pythonObject = pythonObject; } public long pythonExtension() { return this.pythonObject; } public void finalize() throws Throwable { pythonDecRef(); } public native void pythonDecRef(); public native long parseLong(BytesRef ref); public native TermsEnum termsEnum(Terms terms) throws IOException; } pylucene-4.10.1-1/java/org/apache/pylucene/search/PythonShortParser.java000644 000765 000000 00000003045 12106374252 026351 0ustar00vajdawheel000000 000000 /* ==================================================================== * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== */ package org.apache.pylucene.search; import java.io.IOException; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.FieldCache; import org.apache.lucene.util.BytesRef; /** * @author Andi Vajda */ public class PythonShortParser implements FieldCache.ShortParser { private long pythonObject; public PythonShortParser() { } public void pythonExtension(long pythonObject) { this.pythonObject = pythonObject; } public long pythonExtension() { return this.pythonObject; } public void finalize() throws Throwable { pythonDecRef(); } public native void pythonDecRef(); public native short parseShort(BytesRef ref); public native TermsEnum termsEnum(Terms terms) throws IOException; } pylucene-4.10.1-1/java/org/apache/pylucene/search/similarities/000755 000765 000000 00000000000 12413103672 024521 5ustar00vajdawheel000000 000000 pylucene-4.10.1-1/java/org/apache/pylucene/search/spans/000755 000765 000000 00000000000 12413103672 023147 5ustar00vajdawheel000000 000000 pylucene-4.10.1-1/java/org/apache/pylucene/search/spans/PythonSpans.java000644 000765 000000 00000003252 12134322213 026274 0ustar00vajdawheel000000 000000 /* ==================================================================== * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== */ package org.apache.pylucene.search.spans; import java.io.IOException; import java.util.Collection; import org.apache.lucene.search.spans.Spans; public class PythonSpans extends Spans { private long pythonObject; public PythonSpans() { } public void pythonExtension(long pythonObject) { this.pythonObject = pythonObject; } public long pythonExtension() { return this.pythonObject; } public void finalize() throws Throwable { pythonDecRef(); } public native void pythonDecRef(); public native boolean next() throws IOException; public native boolean skipTo(int target) throws IOException; public native int doc(); public native int start(); public native int end(); public native Collection getPayload() throws IOException; public native boolean isPayloadAvailable() throws IOException; public native long cost(); } pylucene-4.10.1-1/java/org/apache/pylucene/search/similarities/PythonDefaultSimilarity.java000644 000765 000000 00000003666 12070177153 032240 0ustar00vajdawheel000000 000000 /* ==================================================================== * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== */ package org.apache.pylucene.search.similarities; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.TermStatistics; import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.lucene.index.FieldInvertState; public class PythonDefaultSimilarity extends DefaultSimilarity { private long pythonObject; public PythonDefaultSimilarity() { } public void pythonExtension(long pythonObject) { this.pythonObject = pythonObject; } public long pythonExtension() { return this.pythonObject; } public void finalize() throws Throwable { pythonDecRef(); } public native void pythonDecRef(); public native float queryNorm(float sumOfSquaredWeights); public native float coord(int overlap, int maxOverlap); public native float lengthNorm(FieldInvertState state); public native float tf(float freq); public native float sloppyFreq(int distance); public native float idf(long docFreq, long numDocs); public native Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics[] stats); } pylucene-4.10.1-1/java/org/apache/pylucene/search/highlight/PythonFormatter.java000644 000765 000000 00000002644 11140022613 027777 0ustar00vajdawheel000000 000000 /* ==================================================================== * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== */ package org.apache.pylucene.search.highlight; import org.apache.lucene.search.highlight.Formatter; import org.apache.lucene.search.highlight.TokenGroup; public class PythonFormatter implements Formatter { private long pythonObject; public PythonFormatter() { } public void pythonExtension(long pythonObject) { this.pythonObject = pythonObject; } public long pythonExtension() { return this.pythonObject; } public void finalize() throws Throwable { pythonDecRef(); } public native void pythonDecRef(); public native String highlightTerm(String originalText, TokenGroup tokenGroup); } pylucene-4.10.1-1/java/org/apache/pylucene/search/highlight/PythonFragmenter.java000644 000765 000000 00000002634 11260501155 030133 0ustar00vajdawheel000000 000000 /* ==================================================================== * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== */ package org.apache.pylucene.search.highlight; import org.apache.lucene.search.highlight.Fragmenter; import org.apache.lucene.analysis.TokenStream; public class PythonFragmenter implements Fragmenter { private long pythonObject; public PythonFragmenter() { } public void pythonExtension(long pythonObject) { this.pythonObject = pythonObject; } public long pythonExtension() { return this.pythonObject; } public void finalize() throws Throwable { pythonDecRef(); } public native void pythonDecRef(); public native boolean isNewFragment(); public native void start(String originalText, TokenStream tokenStream); } pylucene-4.10.1-1/java/org/apache/pylucene/queryparser/classic/000755 000765 000000 00000000000 12413103672 024561 5ustar00vajdawheel000000 000000 pylucene-4.10.1-1/java/org/apache/pylucene/queryparser/classic/PythonMultiFieldQueryParser.java000644 000765 000000 00000006330 12262032117 033066 0ustar00vajdawheel000000 000000 /* ==================================================================== * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== */ package org.apache.pylucene.queryparser.classic; import java.util.List; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.search.Query; import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.util.Version; public class PythonMultiFieldQueryParser extends MultiFieldQueryParser { private long pythonObject; public PythonMultiFieldQueryParser(Version version, String[] fields, Analyzer analyzer) { super(version, fields, analyzer); } public void pythonExtension(long pythonObject) { this.pythonObject = pythonObject; } public long pythonExtension() { return this.pythonObject; } public void finalize() throws Throwable { pythonDecRef(); } public native void pythonDecRef(); public native Query getBooleanQuery(List clauses, boolean disableCoord); public native Query getFuzzyQuery(String field, String termText, float minSimilarity); public native Query getPrefixQuery(String field, String termText); public native Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive); public native Query getWildcardQuery(String field, String termText); public native Query getFieldQuery_quoted(String field, String queryText, boolean quoted); public native Query getFieldQuery_slop(String field, String queryText, int slop); public Query getFieldQuery_quoted_super(String field, String queryText, boolean quoted) throws ParseException { return super.getFieldQuery(field, queryText, quoted); } public Query getFieldQuery_slop_super(String field, String queryText, int slop) throws ParseException { return super.getFieldQuery(field, queryText, slop); } public Query getFieldQuery(String field, String queryText, boolean quoted) { return getFieldQuery_quoted(field, queryText, quoted); } public Query getFieldQuery(String field, String queryText, int slop) { return getFieldQuery_slop(field, queryText, slop); } } pylucene-4.10.1-1/java/org/apache/pylucene/queryparser/classic/PythonQueryParser.java000644 000765 000000 00000006421 12262030174 031111 0ustar00vajdawheel000000 000000 /* ==================================================================== * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== */ package org.apache.pylucene.queryparser.classic; import java.util.List; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.search.Query; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.queryparser.classic.CharStream; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.util.Version; public class PythonQueryParser extends QueryParser { private long pythonObject; public PythonQueryParser(Version version, String field, Analyzer analyzer) { super(version, field, analyzer); } public PythonQueryParser(CharStream stream) { super(stream); } public void pythonExtension(long pythonObject) { this.pythonObject = pythonObject; } public long pythonExtension() { return this.pythonObject; } public void finalize() throws Throwable { pythonDecRef(); } public native void pythonDecRef(); public native Query getBooleanQuery(List clauses, boolean disableCoord); public native Query getFuzzyQuery(String field, String termText, float minSimilarity); public native Query getPrefixQuery(String field, String termText); public native Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive); public native Query getWildcardQuery(String field, String termText); public native Query getFieldQuery_quoted(String field, String queryText, boolean quoted); public native Query getFieldQuery_slop(String field, String queryText, int slop); public Query getFieldQuery_quoted_super(String field, String queryText, boolean quoted) throws ParseException { return super.getFieldQuery(field, queryText, quoted); } public Query getFieldQuery_slop_super(String field, String queryText, int slop) throws ParseException { return super.getFieldQuery(field, queryText, slop); } public Query getFieldQuery(String field, String queryText, boolean quoted) { return getFieldQuery_quoted(field, queryText, quoted); } public Query getFieldQuery(String field, String queryText, int slop) { return getFieldQuery_slop(field, queryText, slop); } } pylucene-4.10.1-1/java/org/apache/pylucene/index/PythonIndexDeletionPolicy.java000644 000765 000000 00000003042 12134322213 027636 0ustar00vajdawheel000000 000000 /* ==================================================================== * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== */ package org.apache.pylucene.index; import java.io.IOException; import java.util.List; import org.apache.lucene.index.IndexCommit; import org.apache.lucene.index.IndexDeletionPolicy; public class PythonIndexDeletionPolicy extends IndexDeletionPolicy { private long pythonObject; public PythonIndexDeletionPolicy() { } public void pythonExtension(long pythonObject) { this.pythonObject = pythonObject; } public long pythonExtension() { return this.pythonObject; } public void finalize() throws Throwable { pythonDecRef(); } public native void pythonDecRef(); public native void onInit(List commits) throws IOException; public native void onCommit(List commits) throws IOException; } pylucene-4.10.1-1/java/org/apache/pylucene/analysis/PythonAnalyzer.java000644 000765 000000 00000002531 11776052737 026254 0ustar00vajdawheel000000 000000 /* ==================================================================== * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== */ package org.apache.pylucene.analysis; import org.apache.lucene.analysis.Analyzer; import java.io.Reader; public class PythonAnalyzer extends Analyzer { private long pythonObject; public PythonAnalyzer() { } public void pythonExtension(long pythonObject) { this.pythonObject = pythonObject; } public long pythonExtension() { return this.pythonObject; } public void finalize() throws Throwable { pythonDecRef(); } public native void pythonDecRef(); public native TokenStreamComponents createComponents(final String fieldName, final Reader reader); } pylucene-4.10.1-1/java/org/apache/pylucene/analysis/PythonCharTokenizer.java000644 000765 000000 00000002744 11562320723 027227 0ustar00vajdawheel000000 000000 /* ==================================================================== * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== */ package org.apache.pylucene.analysis; import java.io.IOException; import java.io.Reader; import org.apache.lucene.analysis.util.CharTokenizer; import org.apache.lucene.util.Version; public class PythonCharTokenizer extends CharTokenizer { private long pythonObject; public PythonCharTokenizer(Version version, Reader reader) { super(version, reader); } public void pythonExtension(long pythonObject) { this.pythonObject = pythonObject; } public long pythonExtension() { return this.pythonObject; } public void finalize() throws Throwable { pythonDecRef(); } public native void pythonDecRef(); public native boolean isTokenChar(int c); public native int normalize(int c); } pylucene-4.10.1-1/java/org/apache/pylucene/analysis/PythonFilteringTokenFilter.java000644 000765 000000 00000003017 12146240342 030540 0ustar00vajdawheel000000 000000 /* ==================================================================== * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== */ package org.apache.pylucene.analysis; import org.apache.lucene.analysis.util.FilteringTokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.util.Version; import java.io.IOException; public class PythonFilteringTokenFilter extends FilteringTokenFilter { private long pythonObject; public PythonFilteringTokenFilter(Version version, TokenStream tokenStream) { super(version, tokenStream); } public void pythonExtension(long pythonObject) { this.pythonObject = pythonObject; } public long pythonExtension() { return this.pythonObject; } public void finalize() throws Throwable { pythonDecRef(); } public native void pythonDecRef(); public native boolean accept() throws IOException; } pylucene-4.10.1-1/java/org/apache/pylucene/analysis/PythonTokenFilter.java000644 000765 000000 00000002726 11265027142 026704 0ustar00vajdawheel000000 000000 /* ==================================================================== * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== */ package org.apache.pylucene.analysis; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Token; import java.io.IOException; public class PythonTokenFilter extends TokenFilter { private long pythonObject; public PythonTokenFilter(TokenStream tokenStream) { super(tokenStream); } public void pythonExtension(long pythonObject) { this.pythonObject = pythonObject; } public long pythonExtension() { return this.pythonObject; } public void finalize() throws Throwable { pythonDecRef(); } public native void pythonDecRef(); public native boolean incrementToken() throws IOException; } pylucene-4.10.1-1/java/org/apache/pylucene/analysis/PythonTokenizer.java000644 000765 000000 00000002650 11265027142 026424 0ustar00vajdawheel000000 000000 /* ==================================================================== * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== */ package org.apache.pylucene.analysis; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.Token; import java.io.IOException; import java.io.Reader; public class PythonTokenizer extends Tokenizer { private long pythonObject; public PythonTokenizer(Reader reader) { super(reader); } public void pythonExtension(long pythonObject) { this.pythonObject = pythonObject; } public long pythonExtension() { return this.pythonObject; } public void finalize() throws Throwable { pythonDecRef(); } public native void pythonDecRef(); public native boolean incrementToken() throws IOException; } pylucene-4.10.1-1/java/org/apache/pylucene/analysis/PythonTokenStream.java000644 000765 000000 00000003044 11265027142 026704 0ustar00vajdawheel000000 000000 /* ==================================================================== * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * ==================================================================== */ package org.apache.pylucene.analysis; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Token; import java.io.IOException; public class PythonTokenStream extends TokenStream { private long pythonObject; public PythonTokenStream() { } public void pythonExtension(long pythonObject) { this.pythonObject = pythonObject; } public long pythonExtension() { return this.pythonObject; } public void finalize() throws Throwable { pythonDecRef(); } public native void pythonDecRef(); public native boolean incrementToken() throws IOException; public native void end() throws IOException; public native void reset() throws IOException; public native void close() throws IOException; }