pylucene-4.10.1-1/ 000755 000765 000000 00000000000 12413103702 013773 5 ustar 00vajda wheel 000000 000000 pylucene-4.10.1-1/CHANGES 000644 000765 000000 00000017635 12412400076 015005 0 ustar 00vajda wheel 000000 000000 Version 4.9.0 -> 4.10.1
-----------------------
- using Lucene 4.10.1 sources
- PyLucene built with JCC 2.21
Version 4.8.0 -> 4.9.0
----------------------
- using Lucene 4.9.0 sources
- FacetSample.py fixed to work with Lucene 4.9 facets API (Thomas Koch)
- PyLucene built with JCC 2.20
Version 4.7.2 -> 4.8.0
----------------------
- using Lucene 4.8.0 sources
- PyLucene built with JCC 2.19
- Lucene now requires Java 7 at the minimum, Java 6 is no longer supported
Version 4.6.1 -> 4.7.2
----------------------
- using Lucene 4.7.2 sources
- PyLucene built with JCC 2.19
Version 4.7.2 -> 4.8.0
----------------------
- using Lucene 4.8.0 sources
- PyLucene built with JCC 2.19
- Lucene now requires Java 7 at the minimum, Java 6 is no longer supported
Version 4.6.1 -> 4.7.2
----------------------
- using Lucene 4.7.2 sources
- PyLucene built with JCC 2.19
Version 4.5.1 -> 4.6.1
----------------------
- using Lucene 4.6.1 sources
- PyLucene built with JCC 2.19
Version 4.4.0 -> 4.5.1
----------------------
- using Lucene 4.5.1 sources
- PyLucene built with JCC 2.18
Version 4.3.0 -> 4.4.0
----------------------
- added vmargs=['-Djava.awt.headless=true'] to all initVM() calls
- using Lucene 4.4.0 sources
- added wrapping of polish analyzer and stemmer
- added inclusion of misc.jar because of cross-dependencies
- PyLucene built with JCC 2.17
Version 3.6.2 -> 4.3.0
----------------------
- switched build to --use_full_names, Python wrappers now follow Java packages
- removed all --rename and most --exclude entries from jcc command line
- removed Lucene in Action samples as they're incompatible with the 4.x API
- migrated all unit tests and remaining samples to Lucene 4.x API
- migrated FacetExample.py to latest 4.x facets API (Thomas Koch)
- PyLucene built with JCC 2.16
Version 3.6.1 -> 3.6.2
----------------------
- using Lucene 3.6.2 sources
- PyLucene built with JCC 2.15
Version 3.6.0 -> 3.6.1
----------------------
- using Lucene 3.6.1 sources
- PyLucene built with JCC 2.14
Version 3.5.0 -> 3.6.0
----------------------
- using Lucene 3.6.0 sources
- renamed classes whose python name would not be unique in lucene module
- refreshed Linux build options, added an OpenJDK 7 example
- added JavaList to collections.py, a Python java.util.List (Thomas Koch)
- added samples/FacetExample.py (Thomas Koch)
- PyLucene built with JCC 2.13
Version 3.4 -> 3.5.0
--------------------
- using Lucene 3.5 sources
- added facet contrib module to build
- refreshed SynonymAnalyzerViewer sample and wordnet index (Thomas Koch)
- added PythonReusableAnalyzerBase (Michael McCandless)
- added PythonIndexDeletionPolicy.java (Michael McCandless)
- added spellchecker contrib module to build
- PyLucene built with JCC 2.12
Version 3.3 -> 3.4
------------------
- added new join contrib module to build
- PyLucene built with JCC 2.11
Version 3.2 -> 3.3
------------------
- using Lucene 3.3 sources
- adapted to FieldComparator becoming generic
- added new grouping contrib module to build
- PyLucene built with JCC 2.10
Version 3.1.0 -> 3.2
--------------------
- using Lucene 3.2 sources
- PyLucene built with JCC 2.9
- rearranged Lucene source checkout tree to reflect new constraints
Version 3.0.0 -> 3.1.0
----------------------
- using Lucene 3.1 sources
- improved support for building on Windows with mingw32
- added wininst target to Makefile
- added port of ICUNormalizer2Filter using C++ ICU's Normalizer2 via PyICU 1.1
- added port of ICUFoldingFilter using C++ ICU's Normalizer2 via PyICU 1.1
- added port of ICUTransformFilter using C++ ICU's Transliterator via PyICU 1.1
- fixed "Lucene in Action" samples left over on old API
- improved support for adding optional contrib modules
- added --package java.util.regex to wrap constructors on PatternAnalyzer
- fixed mansearch.py sample to reflect API changes
- PyLucene built with JCC 2.8
Version 2.9.0 -> 3.0.0
----------------------
- unit tests ported to new API
- removed InstantiatedIndex contrib from default build
- with JCC 2.5's Java generics support, a lot less downcasting needed
- Java Lucene sources now included in PyLucene source distribution
- "Lucene in Action" samples and tests converted to new Lucene 3.0 API
- PyLucene built with JCC 2.5
Version 2.4.1 -> 2.9.0
----------------------
- renamed the Highlighter's SpanScorer class to HighlighterSpanScorer
- fixed bug in Makefile's test target which tested installed build
- added Mac OS X 10.6 sections to Makefile
- added FieldCache.Parser Python extension classes (used in test/test_Sort.py)
- added FieldComparator and FieldComparatorSource Python extension classes
- added 'memory' contrib module to default build
- PyLucene built with JCC 2.4
Version 2.4.0 -> 2.4.1
----------------------
- PyLucene with JCC now a subproject of the Apache Lucene project
- documentation moved to http://lucene.apache.org/pylucene
- added java.util.Arrays to the build to bridge the Java array/collection gap
- added collections.py module with JavaSet class, a Python java.util.Set
- fixed bug in PythonQueryParser overriding wrong method (Aaron Lav)
- PyLucene built with JCC 2.2
- fixed bug with collections.py shadowing Python 2.6's during build
- passing strings for byte[] or char[] is no longer supported, use JArray
- added copy of PyLucene web site to distribution for offline viewing
Version 2.3.2 -> 2.4.0
----------------------
- fixed Debian bug http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=499599
- arrays are now wrapped with JArray() instances instead of expanded into lists
- return by value in arrays now supported
- PythonTermDocs removed since arrays can now receive values
- PythonReader removed since arrays now wrapped
- added InstantiatedIndex contrib to build
- PyLucene built with JCC 2.1
Version 2.3.1 -> 2.3.2
----------------------
- fixed code generation for clone() broken by finalization proxy work
- added 'union' and 'NULL' to the list of reserved words
- fixed castCheck() to work with finalization proxies
- added scorePayload() delegator to PythonSimilarityDelegator
- added support for --install-dir and --use-distutils options
- added support for INSTALL_OPT to Makefile
- fixed basic samples to initialize VM
- added bdist target to Makefile
Version 2.3 -> 2.3.1
--------------------
- fixed bug in JCC using the wrong field modifiers for setter (Bill Janssen)
- added missing calls for generating wrappers for ancestors of Exception
- added missing call for generating wrappers for String
- added PythonTokenizer for implementing complete tokenizers in Python
Version 2.2 -> 2.3
------------------
- PyLucene with JCC introduced
- added support for Python 2.3.5
- added support for using clone() with extensions
- renamed decRef() (and incRef()) native extensions method to pythonDecRef()
- improved error reporting a bit
- JCC now generates Python properties for get/set/is methods
- fixed bug in generated code invoking parent method when inherited from above
- added support for building on 64-bit Linux (Ubuntu 7.10)
- added support for implicitely iterable Enumeration
- added support for --root and --prefix for jcc invocations (Esteve Fernandez)
- jcc switched to setuptools by default (and fallback on distutils)
- fixed bug http://bugzilla.osafoundation.org/show_bug.cgi?id=11643
- added support for automatic boxing of primitives when Object is expected
- fixed bug in missing extensions' Iterator and Enumeration methods
- added JavaSet.py sample using PythonSet and PythonIterator extensions
- added missing LICENSE files
- fixed memory leak when calling inherited methods via callSuper()
- made finalize() method public on extensions for manually breaking ref cycle
- added support for building on Solaris with Sun Studio C++ (Solaris 11)
- fixed leak of local refs of jstring when converting to an array of String
- automated finalization of extensions via proxy for breaking ref cycle
- added Py_CLEAR and Py_VISIT macros for Python 2.3.5 compilation
pylucene-4.10.1-1/CREDITS 000644 000765 000000 00000002511 11131542141 015012 0 ustar 00vajda wheel 000000 000000
PyLucene is a JCC-compiled Python extension of Java Lucene and wouldn't be
possible without the tireless efforts of the people and open source projects
below.
- the Apache Lucene developers,
http://lucene.apache.org/java/docs/whoweare.html
- the Open Source Applications Foundation, for hosting the project from
2004 to 2008: http://www.osafoundation.org
- Andi Vajda, PyLucene and JCC project founder and maintainer, for
believing that PyLucene should be feasible
- the following people contributed patches, samples, bug reports
and resources:
. Kapil Thangavelu (hazmat): FSDirectory support, first unit test
. Frank Wierzbicki: IndexFiles.py and SearchFiles.py samples
. Andreas Jung: several bug reports, nasty bugs indeed
. Jeff Bowden: several bug reports and API additions via patches
. Wai Yip Tung: test_PyLuceneThread.py unit test, windows threads testing
. Yura Smolsky: test_Highlighter.py unit test, numerous bug reports
. Steve Jenson: MultiFieldQueryParser addition to test_PyLucene.py
. Erik Hatcher: man page index and search samples
. Bill Janssen: many bug reports and 'shared mode' suggestion
. Aaron Lav: several memory leaks, fixed with patches and tests
. Grant Ingersoll: for inviting and sponsoring PyLucene's move to Apache
Thank you all !
pylucene-4.10.1-1/extensions.xml 000644 000765 000000 00000001323 11562320723 016723 0 ustar 00vajda wheel 000000 000000
pylucene-4.10.1-1/INSTALL 000644 000765 000000 00000000073 12134322213 015024 0 ustar 00vajda wheel 000000 000000
Please see http://lucene.apache.org/pylucene/install.html
pylucene-4.10.1-1/java/ 000755 000765 000000 00000000000 12413103672 014722 5 ustar 00vajda wheel 000000 000000 pylucene-4.10.1-1/LICENSE 000644 000765 000000 00000026136 11145416716 015025 0 ustar 00vajda wheel 000000 000000
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
pylucene-4.10.1-1/Makefile 000644 000765 000000 00000033617 12413103662 015452 0 ustar 00vajda wheel 000000 000000
# Makefile for building PyLucene
#
# Supported operating systems: Mac OS X, Linux and Windows.
# See INSTALL file for requirements.
# See jcc/INSTALL for information about --shared.
#
# Steps to build
# 1. Edit the sections below as documented
# 2. Edit the JARS variable to add optional contrib modules not defaulted
# 3. make
# 4. make install
#
# The install target installs the lucene python extension in python's
# site-packages directory.
#
VERSION=4.10.1-1
LUCENE_SVN_VER=HEAD
LUCENE_VER=4.10.1
LUCENE_SVN=http://svn.apache.org/repos/asf/lucene/dev/tags/lucene_solr_4_10_1
PYLUCENE:=$(shell pwd)
LUCENE_SRC=lucene-java-$(LUCENE_VER)
LUCENE=$(LUCENE_SRC)/lucene
#
# You need to uncomment and edit the variables below in the section
# corresponding to your operating system.
#
# Windows drive-absolute paths need to be expressed cygwin style.
#
# PREFIX: where programs are normally installed on your system (Unix).
# PREFIX_PYTHON: where your version of python is installed.
# JCC: how jcc is invoked, depending on the python version:
# - python 2.7:
# $(PYTHON) -m jcc
# - python 2.6:
# $(PYTHON) -m jcc.__main__
# - python 2.5:
# $(PYTHON) -m jcc
# - python 2.4:
# $(PYTHON) $(PREFIX_PYTHON)/lib/python2.4/site-packages/jcc/__main__.py
# NUM_FILES is the number of wrapper files to generate. By default, jcc
# generates all C++ classes into one single file. This may exceed a compiler
# limit.
#
# Mac OS X 10.6 (64-bit Python 2.6, Java 1.6)
#PREFIX_PYTHON=/usr
#ANT=ant
#PYTHON=$(PREFIX_PYTHON)/bin/python
#JCC=$(PYTHON) -m jcc.__main__ --shared --arch x86_64
#NUM_FILES=8
# Mac OS X 10.6 (MacPorts 1.8.0 64-bit Python 2.7, Java 1.6)
#PREFIX_PYTHON=/opt/local
#ANT=ant
#PYTHON=$(PREFIX_PYTHON)/bin/python
#JCC=$(PYTHON) -m jcc --shared --arch x86_64
#NUM_FILES=8
# Mac OS X 10.6 (64-bit and 32-bit Python 2.6 together, Java 1.6)
#PREFIX_PYTHON=/usr
#ANT=ant
#PYTHON=$(PREFIX_PYTHON)/bin/python
#JCC=$(PYTHON) -m jcc.__main__ --shared --arch x86_64 --arch i386
#NUM_FILES=8
# Mac OS X 10.5 (32-bit Python 2.5, Java 1.5)
#PREFIX_PYTHON=/usr
#ANT=ant
#PYTHON=$(PREFIX_PYTHON)/bin/python
#JCC=$(PYTHON) -m jcc --shared
#NUM_FILES=8
# Mac OS X (Python 2.3.5, Java 1.5, setuptools 0.6c7, Intel Mac OS X 10.4)
#PREFIX_PYTHON=/usr
#ANT=ant
#PYTHON=$(PREFIX_PYTHON)/bin/python
#JCC=$(PYTHON) /System/Library/Frameworks/Python.framework/Versions/2.3/lib/python2.3/site-packages/JCC-2.3-py2.3-macosx-10.4-i386.egg/jcc/__init__.py
#NUM_FILES=8
# Mac OS X (Python 2.3.5, Java 1.5, setuptools 0.6c7, PPC Mac OS X 10.4)
#PREFIX_PYTHON=/usr
#ANT=ant
#PYTHON=$(PREFIX_PYTHON)/bin/python
#JCC=$(PYTHON) /System/Library/Frameworks/Python.framework/Versions/2.3/lib/python2.3/site-packages/JCC-2.3-py2.3-macosx-10.4-ppc.egg/jcc/__init__.py
#NUM_FILES=8
# Linux (Ubuntu 11.10 64-bit, Python 2.7.2, OpenJDK 1.7, setuptools 0.6.16)
# Be sure to also set JDK['linux2'] in jcc's setup.py to the JAVA_HOME value
# used below for ANT (and rebuild jcc after changing it).
#PREFIX_PYTHON=/usr
#ANT=JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64 /usr/bin/ant
#PYTHON=$(PREFIX_PYTHON)/bin/python
#JCC=$(PYTHON) -m jcc --shared
#NUM_FILES=8
# Linux (Ubuntu 8.10 64-bit, Python 2.5.2, OpenJDK 1.6, setuptools 0.6c9)
#PREFIX_PYTHON=/usr
#ANT=ant
#PYTHON=$(PREFIX_PYTHON)/bin/python
#JCC=$(PYTHON) -m jcc --shared
#NUM_FILES=8
# Linux (Ubuntu 6.06, Python 2.4, Java 1.5, no setuptools)
#PREFIX_PYTHON=/usr
#ANT=ant
#PYTHON=$(PREFIX_PYTHON)/bin/python
#JCC=$(PYTHON) $(PREFIX_PYTHON)/lib/python2.4/site-packages/jcc/__init__.py
#NUM_FILES=8
# FreeBSD
#PREFIX_PYTHON=/usr
#ANT=ant
#PYTHON=$(PREFIX_PYTHON)/bin/python
#JCC=$(PYTHON) -m jcc
#NUM_FILES=8
# Solaris (Solaris 11, Python 2.4 32-bit, Sun Studio 12, Java 1.6)
#PREFIX_PYTHON=/usr
#ANT=/usr/local/apache-ant-1.7.0/bin/ant
#PYTHON=$(PREFIX_PYTHON)/bin/python
#JCC=$(PYTHON) $(PREFIX_PYTHON)/lib/python2.4/site-packages/jcc/__init__.py
#NUM_FILES=8
# Windows (Win32, Python 2.5.1, Java 1.6, ant 1.7.0)
#PREFIX_PYTHON=/cygdrive/o/Python-2.5.2/PCbuild
#ANT=JAVA_HOME=o:\\Java\\jdk1.6.0_02 /cygdrive/o/java/apache-ant-1.7.0/bin/ant
#PYTHON=$(PREFIX_PYTHON)/python.exe
#JCC=$(PYTHON) -m jcc --shared
#NUM_FILES=8
# Windows (Win32, msys/MinGW, Python 2.6.4, Java 1.6, ant 1.7.1 (WinAnt))
#PREFIX_PYTHON=/c/Python26
#ANT=JAVA_HOME="c:\\Program Files\\Java\\jdk1.6.0_18" "/c/Program Files/WinAnt/bin/ant"
#PYTHON=$(PREFIX_PYTHON)/python.exe
#JCC=$(PYTHON) -m jcc.__main__ --shared --compiler mingw32
#NUM_FILES=8
# Windows (Win32, Python 2.7, Java 1.6, ant 1.8.1, Java not on PATH)
#PREFIX_PYTHON=/cygdrive/c/Python27
#ANT=JAVA_HOME=c:\\jdk1.6.0_22 /cygdrive/c/java/apache-ant-1.8.1/bin/ant
#PYTHON=$(PREFIX_PYTHON)/python.exe
#JCC=$(PYTHON) -m jcc --shared --find-jvm-dll
#NUM_FILES=8
JARS=$(LUCENE_JAR)
# comment/uncomment the desired/undesired optional contrib modules below
JARS+=$(ANALYZERS_JAR) # many language analyzers
JARS+=$(MEMORY_JAR) # single-document memory index
JARS+=$(HIGHLIGHTER_JAR) # needs memory contrib
JARS+=$(EXTENSIONS_JAR) # needs highlighter contrib
JARS+=$(QUERIES_JAR) # regex and other contrib queries
JARS+=$(QUERYPARSER_JAR) # query parser
JARS+=$(SANDBOX_JAR) # needed by query parser
#JARS+=$(SMARTCN_JAR) # smart chinese analyzer
JARS+=$(STEMPEL_JAR) # polish analyzer and stemmer
#JARS+=$(SPATIAL_JAR) # spatial lucene
JARS+=$(GROUPING_JAR) # grouping module
JARS+=$(JOIN_JAR) # join module
JARS+=$(FACET_JAR) # facet module
JARS+=$(SUGGEST_JAR) # suggest/spell module
JARS+=$(EXPRESSIONS_JAR) # expressions module
#
# No edits required below
#
SVNOP?=export
ifeq ($(DEBUG),1)
DEBUG_OPT=--debug
endif
DEFINES=-DPYLUCENE_VER="\"$(VERSION)\"" -DLUCENE_VER="\"$(LUCENE_VER)\""
LUCENE_JAR=$(LUCENE)/build/core/lucene-core-$(LUCENE_VER).jar
ANALYZERS_JAR=$(LUCENE)/build/analysis/common/lucene-analyzers-common-$(LUCENE_VER).jar
HIGHLIGHTER_JAR=$(LUCENE)/build/highlighter/lucene-highlighter-$(LUCENE_VER).jar
MEMORY_JAR=$(LUCENE)/build/memory/lucene-memory-$(LUCENE_VER).jar
EXTENSIONS_JAR=build/jar/extensions.jar
QUERIES_JAR=$(LUCENE)/build/queries/lucene-queries-$(LUCENE_VER).jar
QUERYPARSER_JAR=$(LUCENE)/build/queryparser/lucene-queryparser-$(LUCENE_VER).jar
SANDBOX_JAR=$(LUCENE)/build/sandbox/lucene-sandbox-$(LUCENE_VER).jar
SMARTCN_JAR=$(LUCENE)/build/analysis/smartcn/lucene-analyzers-smartcn-$(LUCENE_VER).jar
STEMPEL_JAR=$(LUCENE)/build/analysis/stempel/lucene-analyzers-stempel-$(LUCENE_VER).jar
SPATIAL_JAR=$(LUCENE)/build/spatial/lucene-spatial-$(LUCENE_VER).jar
GROUPING_JAR=$(LUCENE)/build/grouping/lucene-grouping-$(LUCENE_VER).jar
JOIN_JAR=$(LUCENE)/build/join/lucene-join-$(LUCENE_VER).jar
FACET_JAR=$(LUCENE)/build/facet/lucene-facet-$(LUCENE_VER).jar
SUGGEST_JAR=$(LUCENE)/build/suggest/lucene-suggest-$(LUCENE_VER).jar
EXPRESSIONS_JAR=$(LUCENE)/build/expressions/lucene-expressions-$(LUCENE_VER).jar
MISC_JAR=$(LUCENE)/build/misc/lucene-misc-$(LUCENE_VER).jar
ANTLR_JAR=$(LUCENE)/expressions/lib/antlr-runtime-3.5.jar
ASM_JAR=$(LUCENE)/expressions/lib/asm-4.1.jar
ASM_COMMONS_JAR=$(LUCENE)/expressions/lib/asm-commons-4.1.jar
ICUPKG:=$(shell which icupkg)
.PHONY: generate compile install default all clean realclean \
sources ivy test jars distrib
default: all
$(LUCENE_SRC):
svn $(SVNOP) --depth files -r $(LUCENE_SVN_VER) $(LUCENE_SVN) $(LUCENE_SRC)
svn $(SVNOP) -r $(LUCENE_SVN_VER) $(LUCENE_SVN)/lucene $(LUCENE_SRC)/lucene
sources: $(LUCENE_SRC)
ivy:
ifeq ($(ANT),)
$(error ANT is not defined, please edit Makefile as required at top)
else ifeq ($(PYTHON),)
$(error PYTHON is not defined, please edit Makefile as required at top)
else ifeq ($(JCC),)
$(error JCC is not defined, please edit Makefile as required at top)
else ifeq ($(NUM_FILES),)
$(error NUM_FILES is not defined, please edit Makefile as required at top)
endif
cd $(LUCENE); ($(ANT) ivy-availability-check || $(ANT) ivy-bootstrap)
to-orig: sources
mkdir -p $(LUCENE)-orig
tar -C $(LUCENE) -cf - . | tar -C $(LUCENE)-orig -xvf -
from-orig: $(LUCENE)-orig
mkdir -p $(LUCENE)
tar -C $(LUCENE)-orig -cf - . | tar -C $(LUCENE) -xvf -
lucene:
rm -f $(LUCENE_JAR)
$(MAKE) $(LUCENE_JAR)
$(LUCENE_JAR): $(LUCENE)
cd $(LUCENE); $(ANT) -Dversion=$(LUCENE_VER)
$(ANALYZERS_JAR): $(LUCENE_JAR)
cd $(LUCENE)/analysis; $(ANT) -Dversion=$(LUCENE_VER) compile
$(MEMORY_JAR): $(LUCENE_JAR)
cd $(LUCENE)/memory; $(ANT) -Dversion=$(LUCENE_VER)
$(HIGHLIGHTER_JAR): $(LUCENE_JAR)
cd $(LUCENE)/highlighter; $(ANT) -Dversion=$(LUCENE_VER)
$(QUERIES_JAR): $(LUCENE_JAR)
cd $(LUCENE)/queries; $(ANT) -Dversion=$(LUCENE_VER)
$(QUERYPARSER_JAR): $(LUCENE_JAR)
cd $(LUCENE)/queryparser; $(ANT) -Dversion=$(LUCENE_VER)
$(SANDBOX_JAR): $(LUCENE_JAR)
cd $(LUCENE)/sandbox; $(ANT) -Dversion=$(LUCENE_VER)
$(EXTENSIONS_JAR): $(LUCENE_JAR)
$(ANT) -f extensions.xml -Dlucene.dir=$(LUCENE_SRC)
$(SMARTCN_JAR): $(LUCENE_JAR)
cd $(LUCENE)/analysis/smartcn; $(ANT) -Dversion=$(LUCENE_VER)
$(STEMPEL_JAR): $(LUCENE_JAR)
cd $(LUCENE)/analysis/stempel; $(ANT) -Dversion=$(LUCENE_VER)
$(SPATIAL_JAR): $(LUCENE_JAR)
cd $(LUCENE)/spatial; $(ANT) -Dversion=$(LUCENE_VER)
$(GROUPING_JAR): $(LUCENE_JAR)
cd $(LUCENE)/grouping; $(ANT) -Dversion=$(LUCENE_VER)
$(JOIN_JAR): $(LUCENE_JAR)
cd $(LUCENE)/join; $(ANT) -Dversion=$(LUCENE_VER)
$(FACET_JAR): $(LUCENE_JAR)
cd $(LUCENE)/facet; $(ANT) -Dversion=$(LUCENE_VER)
$(SUGGEST_JAR): $(LUCENE_JAR)
cd $(LUCENE)/suggest; $(ANT) -Dversion=$(LUCENE_VER)
$(EXPRESSIONS_JAR): $(LUCENE_JAR)
cd $(LUCENE)/expressions; $(ANT) -Dversion=$(LUCENE_VER)
$(MISC_JAR): $(LUCENE_JAR)
cd $(LUCENE)/misc; $(ANT) -Dversion=$(LUCENE_VER)
JCCFLAGS?=
jars: $(JARS) $(MISC_JAR) $(ANTLR_JAR) $(ASM_JAR) $(ASM_COMMONS)
ifneq ($(ICUPKG),)
ICURES= $(LUCENE)/analysis/icu/src/resources
RESOURCES=--resources $(ICURES)
ifneq ($(PYTHON),)
ENDIANNESS:=$(shell $(PYTHON) -c "import struct; print struct.pack('h', 1) == '\000\001' and 'b' or 'l'")
endif
resources: $(ICURES)/org/apache/lucene/analysis/icu/utr30.dat
$(ICURES)/org/apache/lucene/analysis/icu/utr30.dat: $(ICURES)/org/apache/lucene/analysis/icu/utr30.nrm
rm -f $@
cd $(dir $<); $(ICUPKG) --type $(ENDIANNESS) --add $(notdir $<) new $(notdir $@)
else
RESOURCES=
resources:
@echo ICU not installed
endif
GENERATE=$(JCC) $(foreach jar,$(JARS),--jar $(jar)) \
$(JCCFLAGS) --use_full_names \
--include $(MISC_JAR) \
--include $(ANTLR_JAR) \
--include $(ASM_JAR) \
--include $(ASM_COMMONS_JAR) \
--package java.lang java.lang.System \
java.lang.Runtime \
--package java.util java.util.Arrays \
java.util.Collections \
java.util.HashMap \
java.util.HashSet \
java.util.TreeSet \
java.lang.IllegalStateException \
java.lang.IndexOutOfBoundsException \
java.util.NoSuchElementException \
java.text.SimpleDateFormat \
java.text.DecimalFormat \
java.text.Collator \
--package java.util.concurrent java.util.concurrent.Executors \
--package java.util.regex \
--package java.io java.io.StringReader \
java.io.InputStreamReader \
java.io.FileInputStream \
java.io.DataInputStream \
--exclude org.apache.lucene.sandbox.queries.regex.JakartaRegexpCapabilities \
--exclude org.apache.regexp.RegexpTunnel \
--python lucene \
--mapping org.apache.lucene.document.Document 'get:(Ljava/lang/String;)Ljava/lang/String;' \
--mapping java.util.Properties 'getProperty:(Ljava/lang/String;)Ljava/lang/String;' \
--sequence java.util.AbstractList 'size:()I' 'get:(I)Ljava/lang/Object;' \
org.apache.lucene.index.IndexWriter:getReader \
--version $(LUCENE_VER) \
--module python/collections.py \
--module python/ICUNormalizer2Filter.py \
--module python/ICUFoldingFilter.py \
--module python/ICUTransformFilter.py \
$(RESOURCES) \
--files $(NUM_FILES)
generate: jars
$(GENERATE)
compile: jars
$(GENERATE) --build $(DEBUG_OPT)
install: jars
$(GENERATE) --install $(DEBUG_OPT) $(INSTALL_OPT)
bdist: jars
$(GENERATE) --bdist
wininst: jars
$(GENERATE) --wininst
all: sources ivy jars resources compile
@echo build of $(PYLUCENE_LIB) complete
clean:
if test -f $(LUCENE)/build.xml; then cd $(LUCENE); $(ANT) clean; fi
rm -rf $(LUCENE)/build build
realclean:
if test ! -d $(LUCENE_SRC)/.svn; then rm -rf $(LUCENE_SRC) lucene.egg-info; else rm -rf $(LUCENE)/build; fi
rm -rf build
OS=$(shell uname)
BUILD_TEST:=$(PYLUCENE)/build/test
ifeq ($(findstring CYGWIN,$(OS)),CYGWIN)
BUILD_TEST:=`cygpath -aw $(BUILD_TEST)`
else
ifeq ($(findstring MINGW,$(OS)),MINGW)
BUILD_TEST:=`$(PYTHON) -c "import os, sys; print os.path.normpath(sys.argv[1]).replace(chr(92), chr(92)*2)" $(BUILD_TEST)`
endif
endif
install-test:
mkdir -p $(BUILD_TEST)
PYTHONPATH=$(BUILD_TEST) $(GENERATE) --install $(DEBUG_OPT) --install-dir $(BUILD_TEST)
test: install-test
find test -name 'test_*.py' | PYTHONPATH=$(BUILD_TEST) xargs -t -n 1 $(PYTHON)
ARCHIVE=pylucene-$(VERSION)-src.tar.gz
distrib:
mkdir -p distrib
svn export --force . distrib/pylucene-$(VERSION)
tar -cf - --exclude build $(LUCENE_SRC) | tar -C distrib/pylucene-$(VERSION) -xvf -
cd distrib; tar --disable-copyfile -cvzf $(ARCHIVE) pylucene-$(VERSION)
cd distrib; gpg2 --armor --output $(ARCHIVE).asc --detach-sig $(ARCHIVE)
cd distrib; md5sum $(ARCHIVE) > $(ARCHIVE).md5
stage:
cd distrib; scp -p $(ARCHIVE) $(ARCHIVE).asc $(ARCHIVE).md5 \
people.apache.org:public_html/staging_area
release:
cd distrib; cp -p $(ARCHIVE) $(ARCHIVE).asc $(ARCHIVE).md5 ../../dist/pylucene/
print-%:
@echo $* = $($*)
pylucene-4.10.1-1/NOTICE 000644 000765 000000 00000000352 12134322213 014677 0 ustar 00vajda wheel 000000 000000
Apache PyLucene
Copyright 2009-2013 The Apache Software Foundation
Copyright (c) 2004-2008 Open Source Applications Foundation
This product includes software developed at
The Apache Software Foundation (http://www.apache.org/).
pylucene-4.10.1-1/python/ 000755 000765 000000 00000000000 12413103672 015322 5 ustar 00vajda wheel 000000 000000 pylucene-4.10.1-1/README 000644 000765 000000 00000000074 12134322213 014654 0 ustar 00vajda wheel 000000 000000
Please see http://lucene.apache.org/pylucene/features.html
pylucene-4.10.1-1/samples/ 000755 000765 000000 00000000000 12413103672 015445 5 ustar 00vajda wheel 000000 000000 pylucene-4.10.1-1/test/ 000755 000765 000000 00000000000 12413103672 014760 5 ustar 00vajda wheel 000000 000000 pylucene-4.10.1-1/test/BaseTestRangeFilter.py 000644 000765 000000 00000007125 12070203240 021163 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import lucene # so as to get 'org'
from random import seed, randint
from PyLuceneTestCase import PyLuceneTestCase
from org.apache.lucene.analysis.core import SimpleAnalyzer
from org.apache.lucene.document import Document, Field, StringField
from org.apache.lucene.store import RAMDirectory
from org.apache.lucene.util import Version
class BaseTestRangeFilter(PyLuceneTestCase):
def __init__(self, *args):
super(BaseTestRangeFilter, self).__init__(*args)
#
# Collation interacts badly with hyphens -- collation produces
# different ordering than Unicode code-point ordering -- so two
# indexes are created: one which can't have negative random
# integers, for testing collated ranges, and the other which can
# have negative random integers, for all other tests.
#
self.MAX_INT = 0x7fffffff
class TestIndex(object):
def __init__(_self, minR, maxR, allowNegativeRandomInts):
_self.minR = minR
_self.maxR = maxR
_self.allowNegativeRandomInts = allowNegativeRandomInts
_self.index = RAMDirectory()
self.signedIndex = TestIndex(self.MAX_INT, ~self.MAX_INT, True)
self.unsignedIndex = TestIndex(self.MAX_INT, 0, False)
self.minId = 0
self.maxId = 10000
self.build(self.signedIndex)
self.build(self.unsignedIndex)
#
# a simple padding function that should work with any int
#
def pad(self, n):
if n < 0:
return "-%0.10d" % (self.MAX_INT + n + 1)
else:
return "0%0.10d" % n
def build(self, index):
writer = self.getWriter(directory=index.index,
analyzer=SimpleAnalyzer(Version.LUCENE_CURRENT))
seed(101)
for d in xrange(self.minId, self.maxId + 1):
doc = Document()
doc.add(Field("id", self.pad(d), StringField.TYPE_STORED))
if index.allowNegativeRandomInts:
r = randint(~self.MAX_INT, self.MAX_INT)
else:
r = randint(0, self.MAX_INT)
if index.maxR < r:
index.maxR = r
if r < index.minR:
index.minR = r
doc.add(Field("rand", self.pad(r), StringField.TYPE_STORED))
doc.add(Field("body", "body", StringField.TYPE_STORED))
writer.addDocument(doc)
writer.commit()
writer.close()
def testPad(self):
tests = [-9999999, -99560, -100, -3, -1, 0, 3, 9, 10, 1000, 999999999]
for i in xrange(0, len(tests) - 1):
a = tests[i]
b = tests[i + 1]
aa = self.pad(a)
bb = self.pad(b)
label = "%s:%s vs %s:%s" %(a, aa, b, bb)
self.assertEqual(len(aa), len(bb), "length of %s" %label)
self.assert_(aa < bb, "compare less than %s" %label)
pylucene-4.10.1-1/test/BaseTokenStreamTestCase.py 000644 000765 000000 00000013450 12016246051 022016 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
from unittest import TestCase, main
from lucene import JArray
from java.io import StringReader
from java.lang import Boolean
from org.apache.lucene.analysis.tokenattributes import \
OffsetAttribute, CharTermAttribute, TypeAttribute, \
PositionIncrementAttribute
from org.apache.pylucene.util import PythonAttributeImpl
class BaseTokenStreamTestCase(TestCase):
"""
some helpers to test Analyzers and TokenStreams
"""
class CheckClearAttributesAttributeImpl(PythonAttributeImpl):
def __init__(_self):
super(PythonAttributeImpl, _self).__init__()
_self.clearCalled = False
def getAndResetClearCalled(_self):
try:
return _self.clearCalled
finally:
_self.clearCalled = False
def clear(_self):
_self.clearCalled = True
def equals(_self, other):
return (
CheckClearAttributesAttributeImpl.instance_(other) and
CheckClearAttributesAttributeImpl.cast_(other).clearCalled ==
_self.clearCalled)
def hashCode(_self):
return 76137213 ^ Boolean.valueOf(_self.clearCalled).hashCode()
def copyTo(_self, target):
CheckClearAttributesAttributeImpl.cast_(target).clear()
def _assertTokenStreamContents(self, ts, output,
startOffsets=None, endOffsets=None,
types=None, posIncrements=None,
finalOffset=None):
#checkClearAtt = ts.addAttribute(PythonAttribute.class_);
self.assert_(output is not None)
self.assert_(ts.hasAttribute(CharTermAttribute.class_),
"has no CharTermAttribute")
termAtt = ts.getAttribute(CharTermAttribute.class_)
offsetAtt = None
if (startOffsets is not None or
endOffsets is not None or
finalOffset is not None):
self.assert_(ts.hasAttribute(OffsetAttribute.class_),
"has no OffsetAttribute")
offsetAtt = ts.getAttribute(OffsetAttribute.class_)
typeAtt = None
if types is not None:
self.assert_(ts.hasAttribute(TypeAttribute.class_),
"has no TypeAttribute")
typeAtt = ts.getAttribute(TypeAttribute.class_)
posIncrAtt = None
if posIncrements is not None:
self.assert_(ts.hasAttribute(PositionIncrementAttribute.class_),
"has no PositionIncrementAttribute")
posIncrAtt = ts.getAttribute(PositionIncrementAttribute.class_)
ts.reset()
for i in xrange(len(output)):
# extra safety to enforce, that the state is not preserved and
# also assign bogus values
ts.clearAttributes()
termAtt.setEmpty().append("bogusTerm")
if offsetAtt is not None:
offsetAtt.setOffset(14584724, 24683243)
if typeAtt is not None:
typeAtt.setType("bogusType")
if posIncrAtt is not None:
posIncrAtt.setPositionIncrement(45987657)
self.assert_(ts.incrementToken(), "token %d exists" %(i))
self.assertEqual(output[i], termAtt.toString(), "term %d" %(i))
if startOffsets is not None:
self.assertEqual(startOffsets[i], offsetAtt.startOffset(),
"startOffset %d" %(i))
if endOffsets is not None:
self.assertEqual(endOffsets[i], offsetAtt.endOffset(),
"endOffset %d" %(i))
if types is not None:
self.assertEqual(types[i], typeAtt.type(), "type %d" %(i))
if posIncrements is not None:
self.assertEqual(posIncrements[i],
posIncrAtt.getPositionIncrement(),
"posIncrement %d" %(i))
self.assert_(not ts.incrementToken(), "end of stream")
ts.end()
ts.close()
def _assertAnalyzesTo(self, a, input, output,
startOffsets=None, endOffsets=None,
types=None, posIncrements=None):
ts = a.tokenStream("dummy", StringReader(input))
self._assertTokenStreamContents(ts, output, startOffsets, endOffsets,
types, posIncrements)
def _assertAnalyzesToReuse(self, a, input, output,
startOffsets=None, endOffsets=None,
types=None, posIncrements=None):
ts = a.reusableTokenStream("dummy", StringReader(input))
self._assertTokenStreamContents(ts, output, startOffsets, endOffsets,
types, posIncrements)
# simple utility method for testing stemmers
def _checkOneTerm(self, a, input, expected):
self._assertAnalyzesTo(a, input, JArray('string')(expected))
def _checkOneTermReuse(self, a, input, expected):
self._assertAnalyzesToReuse(a, input, JArray('string')(expected))
pylucene-4.10.1-1/test/MultiSpansWrapper.py 000644 000765 000000 00000010467 12203701624 021000 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import sys, lucene # so that 'org' is found
from java.util import Collections, HashMap, TreeSet
from org.apache.lucene.index import Term, TermContext, ReaderUtil
from org.apache.lucene.search import DocIdSetIterator
from org.apache.lucene.search.spans import SpanQuery
from org.apache.pylucene.search.spans import PythonSpans
class MultiSpansWrapper(PythonSpans):
def __init__(self, leaves, query, termContexts):
super(MultiSpansWrapper, self).__init__()
self.leaves = leaves
self.numLeaves = leaves.size()
self.query = query
self.termContexts = termContexts
self.leafOrd = 0
self.current = None
@classmethod
def wrap(cls, topLevelReaderContext, query):
termContexts = HashMap()
terms = TreeSet()
query.extractTerms(terms)
for term in terms:
termContexts.put(term, TermContext.build(topLevelReaderContext, term))
leaves = topLevelReaderContext.leaves()
if leaves.size() == 1:
ctx = leaves.get(0)
return query.getSpans(ctx, ctx.reader().getLiveDocs(), termContexts)
return MultiSpansWrapper(leaves, query, termContexts)
def next(self):
if self.leafOrd >= self.numLeaves:
return False
if self.current is None:
ctx = self.leaves.get(self.leafOrd)
self.current = self.query.getSpans(ctx, ctx.reader().getLiveDocs(),
self.termContexts)
while True:
if self.current.next():
return True
self.leafOrd += 1
if self.leafOrd < self.numLeaves:
ctx = self.leaves.get(self.leafOrd)
self.current = self.query.getSpans(ctx, ctx.reader().getLiveDocs(), self.termContexts)
else:
self.current = None
break
return False
def skipTo(self, target):
if self.leafOrd >= self.numLeaves:
return False
subIndex = ReaderUtil.subIndex(target, self.leaves)
assert subIndex >= self.leafOrd
if subIndex != self.leafOrd:
ctx = self.leaves.get(subIndex)
self.current = self.query.getSpans(ctx, ctx.reader().getLiveDocs(),
self.termContexts)
self.leafOrd = subIndex
elif self.current is None:
ctx = self.leaves.get(self.leafOrd)
self.current = self.query.getSpans(ctx, ctx.reader().getLiveDocs(),
self.termContexts)
while True:
if self.current.skipTo(target - self.leaves.get(self.leafOrd).docBase):
return True
self.leafOrd += 1
if self.leafOrd < self.numLeaves:
ctx = self.leaves.get(self.leafOrd)
self.current = self.query.getSpans(ctx, ctx.reader().getLiveDocs(), self.termContexts)
else:
self.current = None
break
return False
def doc(self):
if self.current is None:
return DocIdSetIterator.NO_MORE_DOCS
return self.current.doc() + self.leaves.get(self.leafOrd).docBase
def start(self):
if self.current is None:
return DocIdSetIterator.NO_MORE_DOCS
return self.current.start()
def end(self):
if self.current is None:
return DocIdSetIterator.NO_MORE_DOCS
return self.current.end()
def getPayload(self):
if self.current is None:
return Collections.emptyList()
return self.current.getPayload()
def isPayloadAvailable(self):
if self.current is None:
return False
return self.current.isPayloadAvailable()
def cost(self):
return sys.maxint
pylucene-4.10.1-1/test/PyLuceneTestCase.py 000644 000765 000000 00000005362 12070436120 020514 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import lucene # so that 'org' is found
from unittest import TestCase
from java.io import File
from org.apache.lucene.analysis.core import WhitespaceAnalyzer
from org.apache.lucene.document import Field
from org.apache.lucene.analysis.miscellaneous import LimitTokenCountAnalyzer
from org.apache.lucene.index import \
IndexWriter, IndexWriterConfig, DirectoryReader
from org.apache.lucene.search import IndexSearcher
from org.apache.lucene.store import RAMDirectory
from org.apache.lucene.util import Version
class PyLuceneTestCase(TestCase):
def __init__(self, *args):
super(PyLuceneTestCase, self).__init__(*args)
self.TEST_VERSION = Version.LUCENE_CURRENT
def setUp(self):
self.directory = RAMDirectory()
def tearDown(self):
self.directory.close()
def getConfig(self, analyzer=None):
return IndexWriterConfig(self.TEST_VERSION, analyzer)
def getWriter(self, directory=None, analyzer=None, open_mode=None,
similarity=None, maxBufferedDocs=None, mergePolicy=None):
if analyzer is None:
analyzer = LimitTokenCountAnalyzer(WhitespaceAnalyzer(self.TEST_VERSION), 10000)
config = self.getConfig(analyzer)
if open_mode is None:
open_mode = IndexWriterConfig.OpenMode.CREATE
config.setOpenMode(open_mode)
if similarity is not None:
config.setSimilarity(similarity)
if maxBufferedDocs is not None:
config.setMaxBufferedDocs(maxBufferedDocs)
if mergePolicy is not None:
config.setMergePolicy(mergePolicy)
if directory is None:
directory = self.directory
return IndexWriter(directory, config)
def getSearcher(self, directory=None, reader=None):
if reader is not None:
return IndexSearcher(reader)
return IndexSearcher(self.getReader(directory=directory))
def getReader(self, directory=None):
if directory is None:
directory = self.directory
return DirectoryReader.open(directory)
pylucene-4.10.1-1/test/test_Analyzers.py 000644 000765 000000 00000011412 12162654000 020335 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import sys, lucene, unittest
from BaseTokenStreamTestCase import BaseTokenStreamTestCase
from lucene import JArray
from java.io import StringReader
from org.apache.lucene.analysis.core import \
SimpleAnalyzer, WhitespaceAnalyzer, StopAnalyzer, WhitespaceTokenizer
from org.apache.lucene.analysis.tokenattributes import PayloadAttribute
from org.apache.lucene.util import Version, BytesRef
from org.apache.pylucene.analysis import PythonTokenFilter
class AnalyzersTestCase(BaseTokenStreamTestCase):
"""
Unit tests ported from Java Lucene
"""
def testSimple(self):
a = SimpleAnalyzer(Version.LUCENE_CURRENT)
self._assertAnalyzesTo(a, "foo bar FOO BAR",
[ "foo", "bar", "foo", "bar" ])
self._assertAnalyzesTo(a, "foo bar . FOO <> BAR",
[ "foo", "bar", "foo", "bar" ])
self._assertAnalyzesTo(a, "foo.bar.FOO.BAR",
[ "foo", "bar", "foo", "bar" ])
self._assertAnalyzesTo(a, "U.S.A.",
[ "u", "s", "a" ])
self._assertAnalyzesTo(a, "C++",
[ "c" ])
self._assertAnalyzesTo(a, "B2B",
[ "b", "b" ])
self._assertAnalyzesTo(a, "2B",
[ "b" ])
self._assertAnalyzesTo(a, "\"QUOTED\" word",
[ "quoted", "word" ])
def testNull(self):
a = WhitespaceAnalyzer(Version.LUCENE_CURRENT)
self._assertAnalyzesTo(a, "foo bar FOO BAR",
[ "foo", "bar", "FOO", "BAR" ])
self._assertAnalyzesTo(a, "foo bar . FOO <> BAR",
[ "foo", "bar", ".", "FOO", "<>", "BAR" ])
self._assertAnalyzesTo(a, "foo.bar.FOO.BAR",
[ "foo.bar.FOO.BAR" ])
self._assertAnalyzesTo(a, "U.S.A.",
[ "U.S.A." ])
self._assertAnalyzesTo(a, "C++",
[ "C++" ])
self._assertAnalyzesTo(a, "B2B",
[ "B2B" ])
self._assertAnalyzesTo(a, "2B",
[ "2B" ])
self._assertAnalyzesTo(a, "\"QUOTED\" word",
[ "\"QUOTED\"", "word" ])
def testStop(self):
a = StopAnalyzer(Version.LUCENE_CURRENT)
self._assertAnalyzesTo(a, "foo bar FOO BAR",
[ "foo", "bar", "foo", "bar" ])
self._assertAnalyzesTo(a, "foo a bar such FOO THESE BAR",
[ "foo", "bar", "foo", "bar" ])
def _verifyPayload(self, ts):
ts.reset()
payloadAtt = ts.getAttribute(PayloadAttribute.class_)
b = 0
while True:
b += 1
if not ts.incrementToken():
break
self.assertEqual(b, payloadAtt.getPayload().bytes[0])
# Make sure old style next() calls result in a new copy of payloads
def testPayloadCopy(self):
s = "how now brown cow"
ts = WhitespaceTokenizer(Version.LUCENE_CURRENT, StringReader(s))
ts = PayloadSetter(ts)
self._verifyPayload(ts)
ts = WhitespaceTokenizer(Version.LUCENE_CURRENT, StringReader(s))
ts = PayloadSetter(ts)
self._verifyPayload(ts)
class PayloadSetter(PythonTokenFilter):
def __init__(self, input):
super(PayloadSetter, self).__init__(input)
self.input = input
self.payloadAtt = self.addAttribute(PayloadAttribute.class_)
self.data = JArray('byte')(1)
self.p = BytesRef(self.data, 0, 1)
def incrementToken(self):
if not self.input.incrementToken():
return False
self.payloadAtt.setPayload(self.p)
self.data[0] += 1;
return True
if __name__ == "__main__":
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
unittest.main()
except:
pass
else:
unittest.main()
pylucene-4.10.1-1/test/test_Binary.py 000644 000765 000000 00000003026 12162654000 017613 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
# test PyLucene binary field
import sys, lucene, unittest
from lucene import JArray
from org.apache.lucene.document import StoredField
class BinaryTestCase(unittest.TestCase):
def binary(self, b):
c = JArray('byte')(b)
field = StoredField("bin", c)
v = field.binaryValue().bytes
assert c == v and b == [a for a in v]
def testBinary(self):
self.binary([66, 90, 104, 57, 49, 65, 89, 38,
83, 89, 105, 56, 95, 75, 0, 0, 14, -41, -128])
self.binary([])
self.binary([0, 0, 0])
if __name__ == '__main__':
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
unittest.main()
except:
pass
else:
unittest.main()
pylucene-4.10.1-1/test/test_BinaryDocument.py 000644 000765 000000 00000010160 12162654000 021307 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import sys, lucene, unittest
from lucene import JArray
from PyLuceneTestCase import PyLuceneTestCase
from org.apache.lucene.document import \
Document, StoredField, CompressionTools, Field, FieldType
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.index import IndexWriter
from org.apache.lucene.util import Version
class TestBinaryDocument(PyLuceneTestCase):
binaryValStored = "this text will be stored as a byte array in the index"
binaryValCompressed = "this text will be also stored and compressed as a byte array in the index"
def testBinaryFieldInIndex(self):
ft = FieldType()
ft.setStored(True)
bytes = JArray('byte')(self.binaryValStored)
binaryFldStored = StoredField("binaryStored", bytes)
stringFldStored = Field("stringStored", self.binaryValStored, ft)
doc = Document()
doc.add(binaryFldStored)
doc.add(stringFldStored)
# test for field count
self.assertEqual(2, doc.fields.size())
# add the doc to a ram index
writer = self.getWriter(analyzer=StandardAnalyzer(Version.LUCENE_CURRENT))
writer.addDocument(doc)
writer.close()
# open a reader and fetch the document
reader = self.getReader()
docFromReader = reader.document(0)
self.assert_(docFromReader is not None)
# fetch the binary stored field and compare it's content with the
# original one
bytes = docFromReader.getBinaryValue("binaryStored")
binaryFldStoredTest = bytes.bytes.string_
self.assertEqual(binaryFldStoredTest, self.binaryValStored)
# fetch the string field and compare it's content with the original
# one
stringFldStoredTest = docFromReader.get("stringStored")
self.assertEqual(stringFldStoredTest, self.binaryValStored)
reader.close()
def testCompressionTools(self):
bytes = JArray('byte')(self.binaryValCompressed)
binaryFldCompressed = StoredField("binaryCompressed", CompressionTools.compress(bytes))
stringFldCompressed = StoredField("stringCompressed", CompressionTools.compressString(self.binaryValCompressed))
doc = Document()
doc.add(binaryFldCompressed)
doc.add(stringFldCompressed)
# add the doc to a ram index
writer = self.getWriter(analyzer=StandardAnalyzer(Version.LUCENE_CURRENT))
writer.addDocument(doc)
writer.close()
# open a reader and fetch the document
reader = self.getReader()
docFromReader = reader.document(0)
self.assert_(docFromReader is not None)
# fetch the binary compressed field and compare it's content with
# the original one
bytes = CompressionTools.decompress(docFromReader.getBinaryValue("binaryCompressed"))
binaryFldCompressedTest = bytes.string_
self.assertEqual(binaryFldCompressedTest, self.binaryValCompressed)
self.assertEqual(CompressionTools.decompressString(docFromReader.getBinaryValue("stringCompressed")), self.binaryValCompressed)
reader.close()
if __name__ == '__main__':
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
unittest.main()
except:
pass
else:
unittest.main()
pylucene-4.10.1-1/test/test_BooleanOr.py 000644 000765 000000 00000011151 12162654000 020245 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import sys, lucene, unittest
from PyLuceneTestCase import PyLuceneTestCase
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.document import Document, Field, TextField
from org.apache.lucene.index import Term
from org.apache.lucene.search import BooleanClause, BooleanQuery, TermQuery
from org.apache.lucene.util import Version
class BooleanOrTestCase(PyLuceneTestCase):
"""
Unit tests ported from Java Lucene
"""
def __init__(self, *args):
super(BooleanOrTestCase, self).__init__(*args)
self.FIELD_T = "T"
self.FIELD_C = "C"
self.t1 = TermQuery(Term(self.FIELD_T, "files"))
self.t2 = TermQuery(Term(self.FIELD_T, "deleting"))
self.c1 = TermQuery(Term(self.FIELD_C, "production"))
self.c2 = TermQuery(Term(self.FIELD_C, "optimize"))
self.searcher = None
def setUp(self):
super(BooleanOrTestCase, self).setUp()
# add the doc to a ram index
writer = self.getWriter(analyzer=StandardAnalyzer(Version.LUCENE_CURRENT))
d = Document()
d.add(Field(self.FIELD_T, "Optimize not deleting all files",
TextField.TYPE_STORED))
d.add(Field(self.FIELD_C,
"Deleted When I run an optimize in our production environment.",
TextField.TYPE_STORED))
writer.addDocument(d)
writer.close()
self.searcher = self.getSearcher()
def search(self, q):
return self.searcher.search(q, 50).totalHits
def testElements(self):
self.assertEqual(1, self.search(self.t1))
self.assertEqual(1, self.search(self.t2))
self.assertEqual(1, self.search(self.c1))
self.assertEqual(1, self.search(self.c2))
def testFlat(self):
q = BooleanQuery()
q.add(BooleanClause(self.t1, BooleanClause.Occur.SHOULD))
q.add(BooleanClause(self.t2, BooleanClause.Occur.SHOULD))
q.add(BooleanClause(self.c1, BooleanClause.Occur.SHOULD))
q.add(BooleanClause(self.c2, BooleanClause.Occur.SHOULD))
self.assertEqual(1, self.search(q))
def testParenthesisMust(self):
q3 = BooleanQuery()
q3.add(BooleanClause(self.t1, BooleanClause.Occur.SHOULD))
q3.add(BooleanClause(self.t2, BooleanClause.Occur.SHOULD))
q4 = BooleanQuery()
q4.add(BooleanClause(self.c1, BooleanClause.Occur.MUST))
q4.add(BooleanClause(self.c2, BooleanClause.Occur.MUST))
q2 = BooleanQuery()
q2.add(q3, BooleanClause.Occur.SHOULD)
q2.add(q4, BooleanClause.Occur.SHOULD)
self.assertEqual(1, self.search(q2))
def testParenthesisMust2(self):
q3 = BooleanQuery()
q3.add(BooleanClause(self.t1, BooleanClause.Occur.SHOULD))
q3.add(BooleanClause(self.t2, BooleanClause.Occur.SHOULD))
q4 = BooleanQuery()
q4.add(BooleanClause(self.c1, BooleanClause.Occur.SHOULD))
q4.add(BooleanClause(self.c2, BooleanClause.Occur.SHOULD))
q2 = BooleanQuery()
q2.add(q3, BooleanClause.Occur.SHOULD)
q2.add(q4, BooleanClause.Occur.MUST)
self.assertEqual(1, self.search(q2))
def testParenthesisShould(self):
q3 = BooleanQuery()
q3.add(BooleanClause(self.t1, BooleanClause.Occur.SHOULD))
q3.add(BooleanClause(self.t2, BooleanClause.Occur.SHOULD))
q4 = BooleanQuery()
q4.add(BooleanClause(self.c1, BooleanClause.Occur.SHOULD))
q4.add(BooleanClause(self.c2, BooleanClause.Occur.SHOULD))
q2 = BooleanQuery()
q2.add(q3, BooleanClause.Occur.SHOULD)
q2.add(q4, BooleanClause.Occur.SHOULD)
self.assertEqual(1, self.search(q2))
if __name__ == "__main__":
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
unittest.main()
except:
pass
else:
unittest.main()
pylucene-4.10.1-1/test/test_BooleanQuery.py 000644 000765 000000 00000004345 12162654000 021001 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import sys, lucene, unittest
from PyLuceneTestCase import PyLuceneTestCase
from org.apache.lucene.index import Term
from org.apache.lucene.search import BooleanClause, BooleanQuery, TermQuery
class TestBooleanQuery(PyLuceneTestCase):
"""
Unit tests ported from Java Lucene
"""
def testEquality(self):
bq1 = BooleanQuery()
bq1.add(TermQuery(Term("field", "value1")), BooleanClause.Occur.SHOULD)
bq1.add(TermQuery(Term("field", "value2")), BooleanClause.Occur.SHOULD)
nested1 = BooleanQuery()
nested1.add(TermQuery(Term("field", "nestedvalue1")), BooleanClause.Occur.SHOULD)
nested1.add(TermQuery(Term("field", "nestedvalue2")), BooleanClause.Occur.SHOULD)
bq1.add(nested1, BooleanClause.Occur.SHOULD)
bq2 = BooleanQuery()
bq2.add(TermQuery(Term("field", "value1")), BooleanClause.Occur.SHOULD)
bq2.add(TermQuery(Term("field", "value2")), BooleanClause.Occur.SHOULD)
nested2 = BooleanQuery()
nested2.add(TermQuery(Term("field", "nestedvalue1")), BooleanClause.Occur.SHOULD)
nested2.add(TermQuery(Term("field", "nestedvalue2")), BooleanClause.Occur.SHOULD)
bq2.add(nested2, BooleanClause.Occur.SHOULD)
self.assert_(bq1.equals(bq2))
if __name__ == "__main__":
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
unittest.main()
except:
pass
else:
unittest.main()
pylucene-4.10.1-1/test/test_bug1564.py 000644 000765 000000 00000004041 12162654000 017462 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import sys, lucene, unittest
from PyLuceneTestCase import PyLuceneTestCase
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.document import Document, Field, StoredField, TextField
from org.apache.lucene.queryparser.classic import QueryParser
from org.apache.lucene.util import Version
class Test_Bug1564(PyLuceneTestCase):
def setUp(self):
super(Test_Bug1564, self).setUp()
self.analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
writer = self.getWriter(analyzer=self.analyzer)
doc = Document()
doc.add(Field('all', u'windowpane beplaster rapacious \
catatonia gauntlet wynn depressible swede pick dressmake supreme \
jeremy plumb theoretic bureaucracy causation chartres equipoise \
dispersible careen heard', TextField.TYPE_NOT_STORED))
doc.add(Field('id', '1', StoredField.TYPE))
writer.addDocument(doc)
writer.commit()
writer.close()
def test_bug1564(self):
searcher = self.getSearcher()
query = QueryParser(Version.LUCENE_CURRENT, 'all',
self.analyzer).parse('supreme')
topDocs = searcher.search(query, 50)
self.assertEqual(topDocs.totalHits, 1)
if __name__ == '__main__':
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
unittest.main()
pylucene-4.10.1-1/test/test_bug1763.py 000644 000765 000000 00000004730 12162654000 017470 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import sys, lucene, unittest
from PyLuceneTestCase import PyLuceneTestCase
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.document import Document, Field, StoredField, TextField
from org.apache.lucene.queryparser.classic import QueryParser
from org.apache.lucene.store import RAMDirectory
from org.apache.lucene.util import Version
class Test_Bug1763(PyLuceneTestCase):
def setUp(self):
super(Test_Bug1763, self).setUp()
self.analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
self.d1 = RAMDirectory()
self.d2 = RAMDirectory()
w1, w2 = [self.getWriter(directory=d, analyzer=self.analyzer)
for d in [self.d1, self.d2]]
doc1 = Document()
doc2 = Document()
doc1.add(Field("all", "blah blah double blah Gesundheit",
TextField.TYPE_NOT_STORED))
doc1.add(Field('id', '1', StoredField.TYPE))
doc2.add(Field("all", "a quick brown test ran over the lazy data",
TextField.TYPE_NOT_STORED))
doc2.add(Field('id', '2', StoredField.TYPE))
w1.addDocument(doc1)
w2.addDocument(doc2)
for w in [w1, w2]:
w.close()
def test_bug1763(self):
w1 = self.getWriter(directory=self.d1, analyzer=self.analyzer)
w1.addIndexes([self.getReader(directory=self.d2)])
w1.close()
searcher = self.getSearcher(self.d1)
q = QueryParser(Version.LUCENE_CURRENT, 'all',
self.analyzer).parse('brown')
topDocs = searcher.search(q, 50)
self.assertEqual(searcher.doc(topDocs.scoreDocs[0].doc).get('id'), '2')
if __name__ == '__main__':
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
unittest.main()
pylucene-4.10.1-1/test/test_bug1842.py 000644 000765 000000 00000004453 12162654000 017470 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import sys, lucene, unittest
from PyLuceneTestCase import PyLuceneTestCase
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.document import Document, Field, FieldType, StringField
from org.apache.lucene.index import Term
from org.apache.lucene.search import TermQuery
from org.apache.lucene.util import BytesRefIterator, Version
class Test_Bug1842(PyLuceneTestCase):
def setUp(self):
super(Test_Bug1842, self).setUp()
self.analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
w1 = self.getWriter(analyzer=self.analyzer)
doc1 = Document()
ftype = FieldType()
ftype.setStored(False)
ftype.setIndexed(True)
ftype.setStoreTermVectors(True)
doc1.add(Field("all", "blah blah blah Gesundheit", ftype))
doc1.add(Field('id', '1', StringField.TYPE_NOT_STORED))
w1.addDocument(doc1)
w1.close()
def test_bug1842(self):
reader = self.getReader()
searcher = self.getSearcher()
q = TermQuery(Term("id", '1'))
topDocs = searcher.search(q, 50)
termvec = reader.getTermVector(topDocs.scoreDocs[0].doc, "all")
terms = []
freqs = []
termsEnum = termvec.iterator(None)
for term in BytesRefIterator.cast_(termsEnum):
terms.append(term.utf8ToString())
freqs.append(termsEnum.totalTermFreq())
terms.sort()
self.assert_(terms == ['blah', 'gesundheit'])
self.assert_(freqs == [3, 1])
if __name__ == '__main__':
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
unittest.main()
pylucene-4.10.1-1/test/test_CachingWrapperFilter.py 000644 000765 000000 00000005252 12162654000 022435 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import sys, lucene, unittest
from PyLuceneTestCase import PyLuceneTestCase
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.index import \
AtomicReaderContext, SlowCompositeReaderWrapper
from org.apache.lucene.search import CachingWrapperFilter
from org.apache.lucene.util import Version, FixedBitSet
from org.apache.pylucene.search import PythonFilter
class CachingWrapperFilterTestCase(PyLuceneTestCase):
"""
Unit tests ported from Java Lucene
"""
def testCachingWorks(self):
writer = self.getWriter(analyzer=StandardAnalyzer(Version.LUCENE_CURRENT))
writer.close()
reader = SlowCompositeReaderWrapper.wrap(self.getReader())
context = AtomicReaderContext.cast_(reader.getContext())
class mockFilter(PythonFilter):
def __init__(self):
super(mockFilter, self).__init__()
self._wasCalled = False
def getDocIdSet(self, context, acceptDocs):
self._wasCalled = True;
return FixedBitSet(context.reader().maxDoc())
def clear(self):
self._wasCalled = False
def wasCalled(self):
return self._wasCalled
filter = mockFilter()
cacher = CachingWrapperFilter(filter)
# first time, nested filter is called
strongRef = cacher.getDocIdSet(context, context.reader().getLiveDocs())
self.assert_(filter.wasCalled(), "first time")
# second time, nested filter should not be called
filter.clear()
cacher.getDocIdSet(context, context.reader().getLiveDocs())
self.assert_(not filter.wasCalled(), "second time")
reader.close()
if __name__ == "__main__":
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
unittest.main()
except:
pass
else:
unittest.main()
pylucene-4.10.1-1/test/test_Collections.py 000644 000765 000000 00000021723 12162654000 020651 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import sys, lucene, unittest
from lucene.collections import JavaSet, JavaList
from java.lang import Class, Boolean, Integer, Long, Double, String
from java.util import ArrayList, HashSet
class Test_CollectionsSetBase(unittest.TestCase):
"""base test case for JavaSet (uses integers)
subclass may redefine method 'createTestSet'
"""
def createTestSet(self):
"""creates the test set for this test case
"""
return set(range(9))
def setUp(self):
self.testSet = self.createTestSet()
self.javaSet = JavaSet(self.testSet)
# print "created testSet: %s JavaSet %s" % (self.testSet,self.javaSet)
def tearDown(self):
del self.testSet
del self.javaSet
def test_Contains(self):
elem0 = list(self.testSet)[0]
self.assertTrue(self.javaSet.contains(elem0))
def test_Size(self):
self.assertEqual(len(self.testSet), self.javaSet.size())
def test_Add(self):
"""must fail to add an existing element
"""
elem0 = list(self.testSet)[0]
self.assertFalse(self.javaSet.add(elem0))
self.assertEqual(len(self.testSet),
self.javaSet.size(),
"size has not changed")
def test_HashSet(self):
"""create HashSet in JVM (from the JavaSet)
"""
hashSet = HashSet(self.javaSet)
# print "created HashSet:", hashSet, type(hashSet)
self.assertEqual(self.javaSet.size(),
hashSet.size(),
"HashSet has same size")
elem0 = list(self.testSet)[0]
self.assertTrue(hashSet.contains(elem0))
def test_JArray(self):
"""create JArray in JVM (from the JavaSet)
"""
jArray = self.javaSet.toArray()
# print "created JArray:", jArray, type(jArray)
self.assertEqual(self.javaSet.size(),len(jArray),
"JArray has same size")
elem0 = jArray[0]
elem1 = jArray[1]
# print "JArray: first element: %s (%s)" % (elem0,type(elem0))
# print "JArray: second element: %s (%s)"% (elem1,type(elem1))
def test_ArrayList(self):
"""create ArrayList in JVM (from the JavaSet)
"""
arrayList = ArrayList(self.javaSet)
# print "created ArrayList:", arrayList, type(arrayList)
self.assertEqual(self.javaSet.size(), arrayList.size(),
"ArrayList has same size")
elem0 = arrayList.get(0)
elem1 = arrayList.get(1)
# print "ArrayList: first element: %s (%s) indexOf=%d" % (elem0,type(elem0), arrayList.indexOf(elem0))
# print "ArrayList: second element: %s (%s) indexOf=%d" % (elem1,type(elem1), arrayList.indexOf(elem1))
self.assertFalse(elem0.equals(elem1),
"ArrayList: first element must NOT equal second element")
self.assertNotEqual(elem0, elem1,
"ArrayList: first element must NOT equal second element")
class Test_CollectionsStringSet(Test_CollectionsSetBase):
def createTestSet(self):
return set(['a','b','c'])
class Test_CollectionsFloatSet(Test_CollectionsSetBase):
def createTestSet(self):
return set([1.5, 4.5, -0.5])
class Test_CollectionsBoolList(Test_CollectionsSetBase):
def createTestSet(self):
return set([True,False])
class Test_CollectionsListBase(unittest.TestCase):
"""base test case for JavaList (uses integers)
subclass may redefine method 'createTestList'
"""
def __init__(self, *args, **kwds):
unittest.TestCase.__init__(self, *args, **kwds)
self._primitive_types = {
Class.forName('java.lang.Boolean'): Boolean,
Class.forName('java.lang.Integer'): Integer,
Class.forName('java.lang.Long'): Long,
Class.forName('java.lang.Double'): Double,
Class.forName('java.lang.String'): String
}
def createTestList(self):
"""creates the test list for this test case
"""
return range(9)
def setUp(self):
self.testList = self.createTestList()
self.javaList = JavaList(self.testList)
# print "created testList: %s JavaList %s" % (self.testList,self.javaList)
def tearDown(self):
del self.testList
del self.javaList
def test_Contains(self):
elem0 = self.testList[0]
self.assertTrue(self.javaList.contains(elem0))
def test_Size(self):
self.assertEqual(len(self.testList), self.javaList.size())
def test_Pos(self):
"""elements must have same position
"""
elem0 = self.testList[0]
elem1 = self.testList[1]
pos0 = self.javaList.indexOf(elem0)
pos1 = self.javaList.indexOf(elem1)
self.assertEqual(pos0, 0, "indexOf first element")
self.assertEqual(pos1, 1, "indexOf second element")
def test_HashSet(self):
"""create HashSet in JVM (from the JavaSet)
"""
hashSet = HashSet(self.javaList)
# print "created HashSet:", hashSet, type(hashSet)
self.assertEqual(self.javaList.size(),
hashSet.size(),
"HashSet has same size")
elem0 = self.testList[0]
self.assertTrue(hashSet.contains(elem0))
def test_JArray(self):
"""create JArray in JVM (from the JavaSet)
"""
jArray = self.javaList.toArray()
# print "created JArray:", jArray, type(jArray)
self.assertEqual(self.javaList.size(),len(jArray),
"JArray has same size")
elem0 = jArray[0]
elem1 = jArray[1]
listElem0 = self.testList[0]
listElem1 = self.testList[1]
self.assertEqual(elem0, listElem0,
"should be equal: %s (%s) <-> %s (%s)" % (
elem0,type(elem0), listElem0, type(listElem0)))
self.assertEqual(elem1, listElem1,
"should be equal: %s (%s) <-> %s (%s)" % (
elem1,type(elem1), listElem1, type(listElem1)))
self.assertEqual(type(elem0), type(listElem0),
"should have same type: %s <-> %s" % (
type(elem0), type(listElem0)))
self.assertNotEqual(elem0, elem1,
"JArray: first element must NOT equal second element")
def test_ArrayList(self):
"""create ArrayList in JVM (from the JavaSet)
"""
arrayList = ArrayList(self.javaList)
# print "created ArrayList:", arrayList, type(arrayList)
self.assertEqual(self.javaList.size(), arrayList.size(),
"ArrayList has same size")
elem0 = arrayList.get(0)
elem1 = arrayList.get(1)
self.assertEqual(0, arrayList.indexOf(elem0), "same index position")
self.assertEqual(1, arrayList.indexOf(elem1), "same index position")
listElem0 = self.testList[0]
listElem1 = self.testList[1]
_type = self._primitive_types.get(elem0.getClass())
if _type is not None:
elem0 = _type.class_.cast(elem0)
elem1 = _type.class_.cast(elem1)
self.assertEqual(elem0, listElem0,
"should be equal: %s (%s) <-> %s (%s)" % (
elem0, type(elem0), listElem0, type(listElem0)))
self.assertEqual(elem1, listElem1,
"should be equal: %s (%s) <-> %s (%s)" % (
elem1, type(elem1), listElem1, type(listElem1)))
self.assertEqual(type(elem0), type(listElem0),
"should have same type: %s <-> %s" % (
type(elem0), type(listElem0)))
self.assertNotEqual(elem0, elem1,
"ArrayList: first element must NOT equal second element")
class Test_CollectionsStringList(Test_CollectionsListBase):
def createTestList(self):
return [u'a', u'b', u'c']
class Test_CollectionsFloatList(Test_CollectionsListBase):
def createTestList(self):
return [1.5, 4.5, -0.5]
class Test_CollectionsBoolList(Test_CollectionsListBase):
def createTestList(self):
return [True,False]
if __name__ == '__main__':
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
unittest.main()
pylucene-4.10.1-1/test/test_DocBoost.py 000644 000765 000000 00000005317 12162654000 020110 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import sys, lucene, unittest
from PyLuceneTestCase import PyLuceneTestCase
from org.apache.lucene.analysis.core import SimpleAnalyzer
from org.apache.lucene.document import Document, Field, TextField
from org.apache.lucene.index import Term
from org.apache.lucene.search import TermQuery
from org.apache.pylucene.search import PythonCollector
from org.apache.lucene.util import Version
class DocBoostTestCase(PyLuceneTestCase):
"""
Unit tests ported from Java Lucene
"""
def testDocBoost(self):
writer = self.getWriter(analyzer=SimpleAnalyzer(Version.LUCENE_CURRENT))
f1 = Field("field", "word", TextField.TYPE_STORED)
f2 = Field("field", "word", TextField.TYPE_STORED)
f2.setBoost(2.0)
d1 = Document()
d2 = Document()
d1.add(f1) # boost = 1
d2.add(f2) # boost = 2
writer.addDocument(d1)
writer.addDocument(d2)
writer.close()
scores = [0.0] * 2
class collector(PythonCollector):
def __init__(_self, scores):
super(collector, _self).__init__()
_self.scores = scores
_self.base = 0
def collect(_self, doc, score):
_self.scores[doc + _self.base] = score
def setNextReader(_self, context):
_self.base = context.docBase
def acceptsDocsOutOfOrder(_self):
return True
self.getSearcher().search(TermQuery(Term("field", "word")),
collector(scores))
lastScore = 0.0
for score in scores:
self.assert_(score > lastScore)
lastScore = score
if __name__ == "__main__":
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
unittest.main()
except:
pass
else:
unittest.main()
pylucene-4.10.1-1/test/test_FilteredQuery.py 000644 000765 000000 00000011327 12162654000 021156 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import sys, lucene, unittest
from PyLuceneTestCase import PyLuceneTestCase
from java.util import BitSet
from org.apache.lucene.analysis.core import WhitespaceAnalyzer
from org.apache.lucene.document import Document, Field, TextField
from org.apache.lucene.index import Term
from org.apache.lucene.search import \
FilteredQuery, Sort, SortField, TermRangeQuery, TermQuery
from org.apache.lucene.util import Bits, DocIdBitSet, Version
from org.apache.pylucene.search import PythonFilter
class FilteredQueryTestCase(PyLuceneTestCase):
"""
Unit tests ported from Java Lucene
"""
def setUp(self):
super(FilteredQueryTestCase, self).setUp()
writer = self.getWriter(analyzer=WhitespaceAnalyzer(Version.LUCENE_CURRENT))
doc = Document()
doc.add(Field("field", "one two three four five", TextField.TYPE_STORED))
doc.add(Field("sorter", "b", TextField.TYPE_STORED))
writer.addDocument(doc)
doc = Document()
doc.add(Field("field", "one two three four", TextField.TYPE_STORED))
doc.add(Field("sorter", "d", TextField.TYPE_STORED))
writer.addDocument(doc)
doc = Document()
doc.add(Field("field", "one two three y", TextField.TYPE_STORED))
doc.add(Field("sorter", "a", TextField.TYPE_STORED))
writer.addDocument(doc)
doc = Document()
doc.add(Field("field", "one two x", TextField.TYPE_STORED))
doc.add(Field("sorter", "c", TextField.TYPE_STORED))
writer.addDocument(doc)
writer.commit()
writer.close()
self.searcher = self.getSearcher()
self.query = TermQuery(Term("field", "three"))
class filter(PythonFilter):
def getDocIdSet(self, context, acceptDocs):
if acceptDocs is None:
acceptDocs = Bits.MatchAllBits(5)
bitset = BitSet(5)
if acceptDocs.get(1):
bitset.set(1)
if acceptDocs.get(3):
bitset.set(3)
return DocIdBitSet(bitset)
self.filter = filter()
def testFilteredQuery(self):
filteredquery = FilteredQuery(self.query, self.filter)
topDocs = self.searcher.search(filteredquery, 50)
self.assertEqual(1, topDocs.totalHits)
self.assertEqual(1, topDocs.scoreDocs[0].doc)
topDocs = self.searcher.search(filteredquery, None, 50,
Sort(SortField("sorter",
SortField.Type.STRING)))
self.assertEqual(1, topDocs.totalHits)
self.assertEqual(1, topDocs.scoreDocs[0].doc)
filteredquery = FilteredQuery(TermQuery(Term("field", "one")),
self.filter)
topDocs = self.searcher.search(filteredquery, 50)
self.assertEqual(2, topDocs.totalHits)
filteredquery = FilteredQuery(TermQuery(Term("field", "x")),
self.filter)
topDocs = self.searcher.search(filteredquery, 50)
self.assertEqual(1, topDocs.totalHits)
self.assertEqual(3, topDocs.scoreDocs[0].doc)
filteredquery = FilteredQuery(TermQuery(Term("field", "y")),
self.filter)
topDocs = self.searcher.search(filteredquery, 50)
self.assertEqual(0, topDocs.totalHits)
def testRangeQuery(self):
"""
This tests FilteredQuery's rewrite correctness
"""
rq = TermRangeQuery.newStringRange("sorter", "b", "d", True, True)
filteredquery = FilteredQuery(rq, self.filter)
scoreDocs = self.searcher.search(filteredquery, None, 1000).scoreDocs
self.assertEqual(2, len(scoreDocs))
if __name__ == "__main__":
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
unittest.main()
except:
pass
else:
unittest.main()
pylucene-4.10.1-1/test/test_FuzzyQuery.py 000644 000765 000000 00000034045 12162654000 020551 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import sys, lucene, unittest
from itertools import izip
from lucene import JavaError
from PyLuceneTestCase import PyLuceneTestCase
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.document import Document, Field, TextField
from org.apache.lucene.index import MultiReader, Term
from org.apache.lucene.search import FuzzyQuery, MultiTermQuery
from org.apache.lucene.store import RAMDirectory
from org.apache.lucene.util import Version
class FuzzyQueryTestCase(PyLuceneTestCase):
"""
Unit tests ported from Java Lucene
"""
def _addDoc(self, text, writer):
doc = Document()
doc.add(Field("field", text, TextField.TYPE_STORED))
writer.addDocument(doc)
def testDefaultFuzziness(self):
writer = self.getWriter()
self._addDoc("aaaaa", writer)
self._addDoc("aaaab", writer)
self._addDoc("aaabb", writer)
self._addDoc("aabbb", writer)
self._addDoc("abbbb", writer)
self._addDoc("bbbbb", writer)
self._addDoc("ddddd", writer)
writer.commit()
writer.close()
searcher = self.getSearcher()
query = FuzzyQuery(Term("field", "aaaaa"), FuzzyQuery.defaultMaxEdits, 0)
hits = searcher.search(query, None, 1000).scoreDocs
self.assertEqual(3, len(hits))
# same with prefix
query = FuzzyQuery(Term("field", "aaaaa"), FuzzyQuery.defaultMaxEdits, 1)
hits = searcher.search(query, None, 1000).scoreDocs
self.assertEqual(3, len(hits))
query = FuzzyQuery(Term("field", "aaaaa"), FuzzyQuery.defaultMaxEdits, 2)
hits = searcher.search(query, None, 1000).scoreDocs
self.assertEqual(3, len(hits))
query = FuzzyQuery(Term("field", "aaaaa"), FuzzyQuery.defaultMaxEdits, 3)
hits = searcher.search(query, None, 1000).scoreDocs
self.assertEqual(3, len(hits))
query = FuzzyQuery(Term("field", "aaaaa"), FuzzyQuery.defaultMaxEdits, 4)
hits = searcher.search(query, None, 1000).scoreDocs
self.assertEqual(2, len(hits))
query = FuzzyQuery(Term("field", "aaaaa"), FuzzyQuery.defaultMaxEdits, 5)
hits = searcher.search(query, None, 1000).scoreDocs
self.assertEqual(1, len(hits))
query = FuzzyQuery(Term("field", "aaaaa"), FuzzyQuery.defaultMaxEdits, 6)
hits = searcher.search(query, None, 1000).scoreDocs
self.assertEqual(1, len(hits))
# test scoring
query = FuzzyQuery(Term("field", "bbbbb"), FuzzyQuery.defaultMaxEdits, 0)
hits = searcher.search(query, None, 1000).scoreDocs
self.assertEqual(3, len(hits), "3 documents should match")
order = ("bbbbb", "abbbb", "aabbb")
for hit, o in izip(hits, order):
term = searcher.doc(hit.doc).get("field")
self.assertEqual(o, term)
# test pq size by supplying maxExpansions=2
# This query would normally return 3 documents, because 3 terms match
# (see above):
query = FuzzyQuery(Term("field", "bbbbb"), FuzzyQuery.defaultMaxEdits,
0, 2, False)
hits = searcher.search(query, None, 1000).scoreDocs
self.assertEqual(2, len(hits), "only 2 documents should match");
order = ("bbbbb","abbbb")
for hit, o in izip(hits, order):
term = searcher.doc(hit.doc).get("field")
self.assertEqual(o, term)
# not similar enough:
query = FuzzyQuery(Term("field", "xxxxx"))
topDocs = searcher.search(query, 50)
self.assertEqual(0, topDocs.totalHits)
# edit distance to "aaaaa" = 3
query = FuzzyQuery(Term("field", "aaccc"))
topDocs = searcher.search(query, 50)
self.assertEqual(0, topDocs.totalHits)
# query identical to a word in the index:
query = FuzzyQuery(Term("field", "aaaaa"))
scoreDocs = searcher.search(query, 50).scoreDocs
self.assertEqual(3, len(scoreDocs))
self.assertEqual(searcher.doc(scoreDocs[0].doc).get("field"), "aaaaa")
# default allows for up to two edits:
self.assertEqual(searcher.doc(scoreDocs[1].doc).get("field"), "aaaab")
self.assertEqual(searcher.doc(scoreDocs[2].doc).get("field"), "aaabb")
# query similar to a word in the index:
query = FuzzyQuery(Term("field", "aaaac"))
scoreDocs = searcher.search(query, 50).scoreDocs
self.assertEqual(3, len(scoreDocs))
self.assertEqual(searcher.doc(scoreDocs[0].doc).get("field"), "aaaaa")
self.assertEqual(searcher.doc(scoreDocs[1].doc).get("field"), "aaaab")
self.assertEqual(searcher.doc(scoreDocs[2].doc).get("field"), "aaabb")
# now with prefix
query = FuzzyQuery(Term("field", "aaaac"), FuzzyQuery.defaultMaxEdits, 1)
hits = searcher.search(query, None, 1000).scoreDocs
self.assertEqual(3, len(hits))
self.assertEqual(searcher.doc(hits[0].doc).get("field"), ("aaaaa"))
self.assertEqual(searcher.doc(hits[1].doc).get("field"), ("aaaab"))
self.assertEqual(searcher.doc(hits[2].doc).get("field"), ("aaabb"))
query = FuzzyQuery(Term("field", "aaaac"), FuzzyQuery.defaultMaxEdits, 2)
hits = searcher.search(query, None, 1000).scoreDocs
self.assertEqual(3, len(hits))
self.assertEqual(searcher.doc(hits[0].doc).get("field"), ("aaaaa"))
self.assertEqual(searcher.doc(hits[1].doc).get("field"), ("aaaab"))
self.assertEqual(searcher.doc(hits[2].doc).get("field"), ("aaabb"))
query = FuzzyQuery(Term("field", "aaaac"), FuzzyQuery.defaultMaxEdits, 3)
hits = searcher.search(query, None, 1000).scoreDocs
self.assertEqual(3, len(hits))
self.assertEqual(searcher.doc(hits[0].doc).get("field"), ("aaaaa"))
self.assertEqual(searcher.doc(hits[1].doc).get("field"), ("aaaab"))
self.assertEqual(searcher.doc(hits[2].doc).get("field"), ("aaabb"))
query = FuzzyQuery(Term("field", "aaaac"), FuzzyQuery.defaultMaxEdits, 4)
hits = searcher.search(query, None, 1000).scoreDocs
self.assertEqual(2, len(hits))
self.assertEqual(searcher.doc(hits[0].doc).get("field"), ("aaaaa"))
self.assertEqual(searcher.doc(hits[1].doc).get("field"), ("aaaab"))
query = FuzzyQuery(Term("field", "aaaac"), FuzzyQuery.defaultMaxEdits, 5)
hits = searcher.search(query, None, 1000).scoreDocs
self.assertEqual(0, len(hits))
query = FuzzyQuery(Term("field", "ddddX"), FuzzyQuery.defaultMaxEdits, 0)
hits = searcher.search(query, None, 1000).scoreDocs
self.assertEqual(1, len(hits))
self.assertEqual(searcher.doc(hits[0].doc).get("field"), ("ddddd"))
# now with prefix
query = FuzzyQuery(Term("field", "ddddX"), FuzzyQuery.defaultMaxEdits, 1)
hits = searcher.search(query, None, 1000).scoreDocs
self.assertEqual(1, len(hits))
self.assertEqual(searcher.doc(hits[0].doc).get("field"), ("ddddd"))
query = FuzzyQuery(Term("field", "ddddX"), FuzzyQuery.defaultMaxEdits, 2)
hits = searcher.search(query, None, 1000).scoreDocs
self.assertEqual(1, len(hits))
self.assertEqual(searcher.doc(hits[0].doc).get("field"), ("ddddd"))
query = FuzzyQuery(Term("field", "ddddX"), FuzzyQuery.defaultMaxEdits, 3)
hits = searcher.search(query, None, 1000).scoreDocs;
self.assertEqual(1, len(hits))
self.assertEqual(searcher.doc(hits[0].doc).get("field"), ("ddddd"))
query = FuzzyQuery(Term("field", "ddddX"), FuzzyQuery.defaultMaxEdits, 4)
hits = searcher.search(query, None, 1000).scoreDocs
self.assertEqual(1, len(hits))
self.assertEqual(searcher.doc(hits[0].doc).get("field"), ("ddddd"))
query = FuzzyQuery(Term("field", "ddddX"), FuzzyQuery.defaultMaxEdits, 5)
hits = searcher.search(query, None, 1000).scoreDocs
self.assertEqual(0, len(hits))
# different field = no match:
query = FuzzyQuery(Term("anotherfield", "ddddX"), FuzzyQuery.defaultMaxEdits, 0)
hits = searcher.search(query, None, 1000).scoreDocs
self.assertEqual(0, len(hits))
def test2(self):
writer = self.getWriter()
self._addDoc("LANGE", writer)
self._addDoc("LUETH", writer)
self._addDoc("PIRSING", writer)
self._addDoc("RIEGEL", writer)
self._addDoc("TRZECZIAK", writer)
self._addDoc("WALKER", writer)
self._addDoc("WBR", writer)
self._addDoc("WE", writer)
self._addDoc("WEB", writer)
self._addDoc("WEBE", writer)
self._addDoc("WEBER", writer)
self._addDoc("WEBERE", writer)
self._addDoc("WEBREE", writer)
self._addDoc("WEBEREI", writer)
self._addDoc("WBRE", writer)
self._addDoc("WITTKOPF", writer)
self._addDoc("WOJNAROWSKI", writer)
self._addDoc("WRICKE", writer)
reader = writer.getReader()
searcher = self.getSearcher(reader=reader)
writer.close()
query = FuzzyQuery(Term("field", "WEBER"), 2, 1)
hits = searcher.search(query, None, 1000).scoreDocs
self.assertEqual(8, len(hits))
def testTieBreaker(self):
# MultiTermQuery provides (via attribute) information about which values
# must be competitive to enter the priority queue.
#
# FuzzyQuery optimizes itself around this information, if the attribute
# is not implemented correctly, there will be problems!
#
directory = RAMDirectory()
writer = self.getWriter(directory=directory)
self._addDoc("a123456", writer)
self._addDoc("c123456", writer)
self._addDoc("d123456", writer)
self._addDoc("e123456", writer)
directory2 = RAMDirectory()
writer2 = self.getWriter(directory=directory2)
self._addDoc("a123456", writer2)
self._addDoc("b123456", writer2)
self._addDoc("b123456", writer2)
self._addDoc("b123456", writer2)
self._addDoc("c123456", writer2)
self._addDoc("f123456", writer2)
ir1 = writer.getReader()
ir2 = writer2.getReader()
mr = MultiReader([ir1, ir2])
searcher = self.getSearcher(reader=mr)
fq = FuzzyQuery(Term("field", "z123456"), 1, 0, 2, False)
docs = searcher.search(fq, 2)
self.assertEqual(5, docs.totalHits) # 5 docs, from the a and b's
mr.close()
ir1.close()
ir2.close()
writer.close()
writer2.close()
directory.close()
directory2.close()
def testBoostOnlyRewrite(self):
# Test the TopTermsBoostOnlyBooleanQueryRewrite rewrite method.
writer = self.getWriter()
self._addDoc("Lucene", writer)
self._addDoc("Lucene", writer)
self._addDoc("Lucenne", writer)
reader = writer.getReader()
searcher = self.getSearcher(reader=reader)
writer.close()
query = FuzzyQuery(Term("field", "lucene"))
query.setRewriteMethod(MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(50))
hits = searcher.search(query, None, 1000).scoreDocs
self.assertEqual(3, len(hits))
# normally, 'Lucenne' would be the first result as IDF will skew the score.
self.assertEqual("Lucene", reader.document(hits[0].doc).get("field"))
self.assertEqual("Lucene", reader.document(hits[1].doc).get("field"))
self.assertEqual("Lucenne", reader.document(hits[2].doc).get("field"))
def testGiga(self):
w = self.getWriter(analyzer=StandardAnalyzer(Version.LUCENE_CURRENT))
self._addDoc("Lucene in Action", w)
self._addDoc("Lucene for Dummies", w)
self._addDoc("Giga byte", w)
self._addDoc("ManagingGigabytesManagingGigabyte", w)
self._addDoc("ManagingGigabytesManagingGigabytes", w)
self._addDoc("The Art of Computer Science", w)
self._addDoc("J. K. Rowling", w)
self._addDoc("JK Rowling", w)
self._addDoc("Joanne K Roling", w)
self._addDoc("Bruce Willis", w)
self._addDoc("Willis bruce", w)
self._addDoc("Brute willis", w)
self._addDoc("B. willis", w)
r = w.getReader()
w.close()
q = FuzzyQuery(Term("field", "giga"), 0)
searcher = self.getSearcher(reader=r)
hits = searcher.search(q, 10).scoreDocs
self.assertEqual(1, len(hits))
self.assertEqual("Giga byte", searcher.doc(hits[0].doc).get("field"))
def testDistanceAsEditsSearching(self):
w = self.getWriter()
self._addDoc("foobar", w)
self._addDoc("test", w)
self._addDoc("working", w)
reader = w.getReader()
searcher = self.getSearcher(reader=reader)
w.close()
q = FuzzyQuery(Term("field", "fouba"), 2)
hits = searcher.search(q, 10).scoreDocs
self.assertEqual(1, len(hits))
self.assertEqual("foobar", searcher.doc(hits[0].doc).get("field"))
q = FuzzyQuery(Term("field", "foubara"), 2)
hits = searcher.search(q, 10).scoreDocs
self.assertEqual(1, len(hits))
self.assertEqual("foobar", searcher.doc(hits[0].doc).get("field"))
try:
q = FuzzyQuery(Term("field", "t"), 3)
self.fail()
except JavaError, e:
#expected
pass
if __name__ == "__main__":
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
unittest.main()
except:
pass
else:
unittest.main()
pylucene-4.10.1-1/test/test_Highlighter.py 000644 000765 000000 00000014261 12162654000 020630 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import sys, lucene, unittest
from PyLuceneTestCase import PyLuceneTestCase
from java.io import StringReader
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.document import Document, Field, TextField
from org.apache.lucene.queryparser.classic import QueryParser
from org.apache.lucene.search.highlight import \
Highlighter, QueryScorer, SimpleFragmenter
from org.apache.lucene.util import Version
from org.apache.pylucene.search.highlight import PythonFormatter
class TestFormatter(PythonFormatter):
def __init__(self, testCase):
super(TestFormatter, self).__init__()
self.testCase = testCase
def highlightTerm(self, originalText, group):
if group.getTotalScore() <= 0:
return originalText;
self.testCase.countHighlightTerm()
return "" + originalText + ""
class HighlighterTestCase(PyLuceneTestCase):
"""
Unit tests ported from Java Lucene.
2004 by Yura Smolsky ;)
"""
FIELD_NAME = "contents"
texts = [ "A wicked problem is one for which each attempt to create a solution changes the understanding of the problem. Wicked problems cannot be solved in a traditional linear fashion, because the problem definition evolves as new possible solutions are considered and/or implemented."
"Wicked problems always occur in a social context -- the wickedness of the problem reflects the diversity among the stakeholders in the problem."
"From http://cognexus.org/id42.htm"
"Most projects in organizations -- and virtually all technology-related projects these days -- are about wicked problems. Indeed, it is the social complexity of these problems, not their technical complexity, that overwhelms most current problem solving and project management approaches."
"This text has a typo in referring to whicked problems" ];
def __init__(self, *args):
super(HighlighterTestCase, self).__init__(*args)
self.parser = QueryParser(Version.LUCENE_CURRENT, self.FIELD_NAME,
StandardAnalyzer(Version.LUCENE_CURRENT))
def setUp(self):
super(HighlighterTestCase, self).setUp()
self.analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
writer = self.getWriter(analyzer=self.analyzer)
for text in self.texts:
self.addDoc(writer, text)
writer.commit()
writer.close()
self.reader = self.getReader()
self.numHighlights = 0;
def testSimpleHighlighter(self):
self.doSearching("Wicked")
highlighter = Highlighter(QueryScorer(self.query))
highlighter.setTextFragmenter(SimpleFragmenter(40))
maxNumFragmentsRequired = 2
for scoreDoc in self.scoreDocs:
text = self.searcher.doc(scoreDoc.doc).get(self.FIELD_NAME)
tokenStream = self.analyzer.tokenStream(self.FIELD_NAME,
StringReader(text))
result = highlighter.getBestFragments(tokenStream, text,
maxNumFragmentsRequired,
"...")
print "\t", result
# Not sure we can assert anything here - just running to check we don't
# throw any exceptions
def testGetBestFragmentsSimpleQuery(self):
self.doSearching("Wicked")
self.doStandardHighlights()
self.assert_(self.numHighlights == 3,
("Failed to find correct number of highlights, %d found"
%(self.numHighlights)))
def doSearching(self, queryString):
self.searcher = self.getSearcher()
self.query = self.parser.parse(queryString)
# for any multi-term queries to work (prefix, wildcard, range,
# fuzzy etc) you must use a rewritten query!
self.query = self.query.rewrite(self.reader)
print "Searching for:", self.query.toString(self.FIELD_NAME)
self.scoreDocs = self.searcher.search(self.query, 100).scoreDocs
self.numHighlights = 0
def doStandardHighlights(self):
formatter = TestFormatter(self)
highlighter = Highlighter(formatter, QueryScorer(self.query))
highlighter.setTextFragmenter(SimpleFragmenter(20))
for scoreDoc in self.scoreDocs:
text = self.searcher.doc(scoreDoc.doc).get(self.FIELD_NAME)
maxNumFragmentsRequired = 2
fragmentSeparator = "..."
tokenStream = self.analyzer.tokenStream(self.FIELD_NAME,
StringReader(text))
result = highlighter.getBestFragments(tokenStream,
text,
maxNumFragmentsRequired,
fragmentSeparator)
print "\t", result
def countHighlightTerm(self):
self.numHighlights += 1 # update stats used in assertions
def addDoc(self, writer, text):
d = Document()
f = Field(self.FIELD_NAME, text, TextField.TYPE_STORED)
d.add(f)
writer.addDocument(d)
if __name__ == "__main__":
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
unittest.main()
except:
pass
else:
unittest.main()
pylucene-4.10.1-1/test/test_ICUFoldingFilter.py 000644 000765 000000 00000006566 12162654000 021474 0 ustar 00vajda wheel 000000 000000 # -*- coding: utf-8 -*-
# ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
#
# Port of java/org/apache/lucene/analysis/icu/ICUFoldingFilter.java
# using IBM's C++ ICU wrapped by PyICU (http://pyicu.osafoundation.org)
try:
from icu import Normalizer2, UNormalizationMode2
except ImportError, e:
pass
import sys, lucene, unittest
from BaseTokenStreamTestCase import BaseTokenStreamTestCase
from org.apache.lucene.analysis import Analyzer
from org.apache.lucene.util import Version
from org.apache.lucene.analysis.core import WhitespaceTokenizer
from org.apache.pylucene.analysis import PythonAnalyzer
class TestICUFoldingFilter(BaseTokenStreamTestCase):
def testDefaults(self):
from lucene.ICUFoldingFilter import ICUFoldingFilter
class _analyzer(PythonAnalyzer):
def createComponents(_self, fieldName, reader):
source = WhitespaceTokenizer(Version.LUCENE_CURRENT, reader)
return Analyzer.TokenStreamComponents(source, ICUFoldingFilter(source))
a = _analyzer()
# case folding
self._assertAnalyzesTo(a, "This is a test",
[ "this", "is", "a", "test" ])
# case folding
self._assertAnalyzesTo(a, u"Ruß", [ "russ" ])
# case folding with accent removal
self._assertAnalyzesTo(a, u"ΜΆΪΟΣ", [ u"μαιοσ" ])
self._assertAnalyzesTo(a, u"Μάϊος", [ u"μαιοσ" ])
# supplementary case folding
self._assertAnalyzesTo(a, u"𐐖", [ u"𐐾" ])
# normalization
self._assertAnalyzesTo(a, u"ﴳﴺﰧ", [ u"طمطمطم" ])
# removal of default ignorables
self._assertAnalyzesTo(a, u"क्ष", [ u"कष" ])
# removal of latin accents (composed)
self._assertAnalyzesTo(a, u"résumé", [ "resume" ])
# removal of latin accents (decomposed)
self._assertAnalyzesTo(a, u"re\u0301sume\u0301", [ u"resume" ])
# fold native digits
self._assertAnalyzesTo(a, u"৭০৬", [ "706" ])
# ascii-folding-filter type stuff
self._assertAnalyzesTo(a, u"đis is cræzy", [ "dis", "is", "craezy" ])
if __name__ == "__main__":
try:
import icu
except ImportError:
pass
else:
if icu.ICU_VERSION >= '49':
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
unittest.main()
except:
pass
else:
unittest.main()
else:
print >>sys.stderr, "ICU version >= 49 is required, running:", icu.ICU_VERSION
pylucene-4.10.1-1/test/test_ICUNormalizer2Filter.py 000644 000765 000000 00000006524 12162654000 022310 0 ustar 00vajda wheel 000000 000000 # -*- coding: utf-8 -*-
# ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
#
# Port of java/org/apache/lucene/analysis/icu/ICUNormalizer2Filter.java
# using IBM's C++ ICU wrapped by PyICU (http://pyicu.osafoundation.org)
try:
from icu import Normalizer2, UNormalizationMode2
except ImportError, e:
pass
import sys, lucene, unittest
from BaseTokenStreamTestCase import BaseTokenStreamTestCase
from org.apache.lucene.analysis import Analyzer
from org.apache.lucene.analysis.core import WhitespaceTokenizer
from org.apache.lucene.util import Version
from org.apache.pylucene.analysis import PythonAnalyzer
class TestICUNormalizer2Filter(BaseTokenStreamTestCase):
def testDefaults(self):
from lucene.ICUNormalizer2Filter import ICUNormalizer2Filter
class _analyzer(PythonAnalyzer):
def createComponents(_self, fieldName, reader):
source = WhitespaceTokenizer(Version.LUCENE_CURRENT, reader)
return Analyzer.TokenStreamComponents(source, ICUNormalizer2Filter(source))
a = _analyzer()
# case folding
self._assertAnalyzesTo(a, "This is a test",
[ "this", "is", "a", "test" ])
# case folding
self._assertAnalyzesTo(a, "Ruß", [ "russ" ])
# case folding
self._assertAnalyzesTo(a, u"ΜΆΪΟΣ", [ u"μάϊοσ" ])
self._assertAnalyzesTo(a, u"Μάϊος", [ u"μάϊοσ" ])
# supplementary case folding
self._assertAnalyzesTo(a, u"𐐖", [ u"𐐾" ])
# normalization
self._assertAnalyzesTo(a, u"ﴳﴺﰧ", [ u"طمطمطم" ])
# removal of default ignorables
self._assertAnalyzesTo(a, u"क्ष", [ u"क्ष" ])
def testAlternate(self):
from lucene.ICUNormalizer2Filter import ICUNormalizer2Filter
class analyzer(PythonAnalyzer):
# specify nfc with decompose to get nfd
def tokenStream(_self, fieldName, reader):
return ICUNormalizer2Filter(WhitespaceTokenizer(Version.LUCENE_CURRENT, reader),
Normalizer2.getInstance(None, "nfc", UNormalizationMode2.DECOMPOSE))
a = analyzer()
# decompose EAcute into E + combining Acute
self._assertAnalyzesTo(a, u"\u00E9", [ u"\u0065\u0301" ])
if __name__ == "__main__":
try:
import icu
except ImportError:
pass
else:
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
unittest.main()
except:
pass
else:
unittest.main()
pylucene-4.10.1-1/test/test_ICUTransformFilter.py 000644 000765 000000 00000007141 12162654000 022053 0 ustar 00vajda wheel 000000 000000 # -*- coding: utf-8 -*-
# ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
#
# Port of java/org/apache/lucene/analysis/icu/ICUTransformFilter.java
# using IBM's C++ ICU wrapped by PyICU (http://pyicu.osafoundation.org)
try:
from icu import Transliterator, UTransDirection
except ImportError, e:
pass
import sys, lucene, unittest
from BaseTokenStreamTestCase import BaseTokenStreamTestCase
from java.io import StringReader
from org.apache.lucene.util import Version
from org.apache.lucene.analysis.core import KeywordTokenizer
from org.apache.pylucene.analysis import PythonTokenFilter
class TestICUTransformFilter(BaseTokenStreamTestCase):
def _checkToken(self, transform, input, expected):
from lucene.ICUTransformFilter import ICUTransformFilter
ts = ICUTransformFilter(KeywordTokenizer(StringReader(input)),
transform)
self._assertTokenStreamContents(ts, [ expected ])
def _getTransliterator(self, name):
return Transliterator.createInstance(name, UTransDirection.FORWARD)
def testBasicFunctionality(self):
self._checkToken(self._getTransliterator("Traditional-Simplified"),
u"簡化字", u"简化字")
self._checkToken(self._getTransliterator("Katakana-Hiragana"),
u"ヒラガナ", u"ひらがな")
self._checkToken(self._getTransliterator("Fullwidth-Halfwidth"),
u"アルアノリウ", u"アルアノリウ")
self._checkToken(self._getTransliterator("Any-Latin"),
u"Αλφαβητικός Κατάλογος", u"Alphabētikós Katálogos")
self._checkToken(self._getTransliterator("NFD; [:Nonspacing Mark:] Remove"),
u"Alphabētikós Katálogos", u"Alphabetikos Katalogos")
self._checkToken(self._getTransliterator("Han-Latin"),
u"中国", u"zhōng guó")
def testCustomFunctionality(self):
# convert a's to b's and b's to c's
rules = "a > b; b > c;"
self._checkToken(Transliterator.createFromRules("test", rules, UTransDirection.FORWARD), "abacadaba", "bcbcbdbcb")
def testCustomFunctionality2(self):
# convert a's to b's and b's to c's
rules = "c { a > b; a > d;"
self._checkToken(Transliterator.createFromRules("test", rules, UTransDirection.FORWARD), "caa", "cbd")
def testOptimizer2(self):
self._checkToken(self._getTransliterator("Traditional-Simplified; Lower"),
"ABCDE", "abcde")
if __name__ == "__main__":
try:
import icu
except ImportError:
pass
else:
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
unittest.main()
except:
pass
else:
unittest.main()
pylucene-4.10.1-1/test/test_IndexDeletionPolicy.py 000644 000765 000000 00000005667 12203701624 022320 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import sys, lucene, unittest
from PyLuceneTestCase import PyLuceneTestCase
from org.apache.lucene.analysis.core import WhitespaceAnalyzer
from org.apache.lucene.document import Document
from org.apache.lucene.index import DirectoryReader, IndexWriterConfig
from org.apache.pylucene.index import PythonIndexDeletionPolicy
class MyDeletionPolicy(PythonIndexDeletionPolicy):
onInitCalled = False
onCommitCalled = False
def onInit(self, commits):
self.onInitCalled = True
def onCommit(self, commits):
self.onCommitCalled = True
class IndexDeletionPolicyTestCase(PyLuceneTestCase):
def getConfig(self, analyzer):
self.policy = MyDeletionPolicy()
config = IndexWriterConfig(self.TEST_VERSION, analyzer)
config.setIndexDeletionPolicy(self.policy)
return config
def testIndexDeletionPolicy(self):
writer = self.getWriter()
# no commits exist in the index yet
self.assertTrue(self.policy.onInitCalled)
# we haven't called commit yet
self.assertFalse(self.policy.onCommitCalled)
doc = Document()
writer.addDocument(doc)
writer.commit()
# now we called commit
self.assertTrue(self.policy.onCommitCalled)
# external IR sees 1 commit:
self.assertEquals(1, DirectoryReader.listCommits(self.directory).size())
# commit again:
writer.addDocument(doc)
writer.commit()
# external IR sees 2 commits:
self.assertEquals(2, DirectoryReader.listCommits(self.directory).size())
writer.close()
# open same index, make sure both commits survived:
writer = self.getWriter()
self.assertTrue(self.policy.onInitCalled)
self.assertFalse(self.policy.onCommitCalled)
self.assertEquals(2, DirectoryReader.listCommits(self.directory).size())
writer.close()
# 3 from closing writer again
self.assertEquals(3, DirectoryReader.listCommits(self.directory).size())
if __name__ == "__main__":
lucene.initVM()
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
unittest.main()
except:
pass
else:
unittest.main()
pylucene-4.10.1-1/test/test_Not.py 000644 000765 000000 00000003632 12162654000 017132 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import sys, lucene, unittest
from PyLuceneTestCase import PyLuceneTestCase
from org.apache.lucene.analysis.core import SimpleAnalyzer
from org.apache.lucene.document import Document, Field, TextField
from org.apache.lucene.queryparser.classic import QueryParser
from org.apache.lucene.util import Version
class NotTestCase(PyLuceneTestCase):
"""
Unit tests ported from Java Lucene
"""
def testNot(self):
writer = self.getWriter(analyzer=SimpleAnalyzer(Version.LUCENE_CURRENT))
d1 = Document()
d1.add(Field("field", "a b", TextField.TYPE_STORED))
writer.addDocument(d1)
writer.commit()
writer.close()
searcher = self.getSearcher()
query = QueryParser(Version.LUCENE_CURRENT, "field",
SimpleAnalyzer(Version.LUCENE_CURRENT)).parse("a NOT b")
topDocs = searcher.search(query, 50)
self.assertEqual(0, topDocs.totalHits)
if __name__ == "__main__":
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
unittest.main()
except:
pass
else:
unittest.main()
pylucene-4.10.1-1/test/test_PerFieldAnalyzerWrapper.py 000644 000765 000000 00000004610 12162654000 023130 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import sys, lucene, unittest
from PyLuceneTestCase import PyLuceneTestCase
from java.io import StringReader
from java.util import HashMap
from org.apache.lucene.analysis.core import SimpleAnalyzer, WhitespaceAnalyzer
from org.apache.lucene.analysis.miscellaneous import PerFieldAnalyzerWrapper
from org.apache.lucene.analysis.tokenattributes import CharTermAttribute
from org.apache.lucene.util import Version
class PerFieldAnalyzerTestCase(PyLuceneTestCase):
"""
Unit tests ported from Java Lucene
"""
def testPerField(self):
perField = HashMap()
perField.put("special", SimpleAnalyzer(Version.LUCENE_CURRENT))
analyzer = PerFieldAnalyzerWrapper(WhitespaceAnalyzer(Version.LUCENE_CURRENT), perField)
text = "Qwerty"
tokenStream = analyzer.tokenStream("field", StringReader(text))
tokenStream.reset()
termAtt = tokenStream.getAttribute(CharTermAttribute.class_)
self.assert_(tokenStream.incrementToken())
self.assertEqual("Qwerty", termAtt.toString(),
"WhitespaceAnalyzer does not lowercase")
tokenStream = analyzer.tokenStream("special", StringReader(text))
tokenStream.reset()
termAtt = tokenStream.getAttribute(CharTermAttribute.class_)
self.assert_(tokenStream.incrementToken())
self.assertEqual("qwerty", termAtt.toString(),
"SimpleAnalyzer lowercases")
if __name__ == "__main__":
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
unittest.main()
except:
pass
else:
unittest.main()
pylucene-4.10.1-1/test/test_PhraseQuery.py 000644 000765 000000 00000020754 12203701624 020647 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import sys, lucene, unittest
from PyLuceneTestCase import PyLuceneTestCase
from org.apache.lucene.analysis import Analyzer
from org.apache.lucene.analysis.core import LowerCaseTokenizer, StopAnalyzer
from org.apache.lucene.analysis.tokenattributes import CharTermAttribute
from org.apache.lucene.document import Document, Field, TextField
from org.apache.lucene.index import Term
from org.apache.lucene.search import \
BooleanClause, BooleanQuery, PhraseQuery, TermQuery
from org.apache.lucene.util import Version
from org.apache.pylucene.analysis import \
PythonAnalyzer, PythonFilteringTokenFilter
class PhraseQueryTestCase(PyLuceneTestCase):
"""
Unit tests ported from Java Lucene
"""
def setUp(self):
super(PhraseQueryTestCase, self).setUp()
doc = Document()
doc.add(Field("field", "one two three four five", TextField.TYPE_STORED))
writer = self.getWriter()
writer.addDocument(doc)
writer.close()
self.searcher = self.getSearcher()
self.query = PhraseQuery()
def testNotCloseEnough(self):
self.query.setSlop(2)
self.query.add(Term("field", "one"))
self.query.add(Term("field", "five"))
topDocs = self.searcher.search(self.query, 50)
self.assertEqual(0, topDocs.totalHits)
def testBarelyCloseEnough(self):
self.query.setSlop(3)
self.query.add(Term("field", "one"))
self.query.add(Term("field", "five"))
topDocs = self.searcher.search(self.query, 50)
self.assertEqual(1, topDocs.totalHits)
def testExact(self):
"""
Ensures slop of 0 works for exact matches, but not reversed
"""
# slop is zero by default
self.query.add(Term("field", "four"))
self.query.add(Term("field", "five"))
topDocs = self.searcher.search(self.query, 50)
self.assertEqual(1, topDocs.totalHits, "exact match")
self.query = PhraseQuery()
self.query.add(Term("field", "two"))
self.query.add(Term("field", "one"))
topDocs = self.searcher.search(self.query, 50)
self.assertEqual(0, topDocs.totalHits, "reverse not exact")
def testSlop1(self):
# Ensures slop of 1 works with terms in order.
self.query.setSlop(1)
self.query.add(Term("field", "one"))
self.query.add(Term("field", "two"))
topDocs = self.searcher.search(self.query, 50)
self.assertEqual(1, topDocs.totalHits, "in order")
# Ensures slop of 1 does not work for phrases out of order
# must be at least 2.
self.query = PhraseQuery()
self.query.setSlop(1)
self.query.add(Term("field", "two"))
self.query.add(Term("field", "one"))
topDocs = self.searcher.search(self.query, 50)
self.assertEqual(0, topDocs.totalHits, "reversed, slop not 2 or more")
def testOrderDoesntMatter(self):
"""
As long as slop is at least 2, terms can be reversed
"""
self.query.setSlop(2) # must be at least two for reverse order match
self.query.add(Term("field", "two"))
self.query.add(Term("field", "one"))
topDocs = self.searcher.search(self.query, 50)
self.assertEqual(1, topDocs.totalHits, "just sloppy enough")
self.query = PhraseQuery()
self.query.setSlop(2)
self.query.add(Term("field", "three"))
self.query.add(Term("field", "one"))
topDocs = self.searcher.search(self.query, 50)
self.assertEqual(0, topDocs.totalHits, "not sloppy enough")
def testMultipleTerms(self):
"""
slop is the total number of positional moves allowed
to line up a phrase
"""
self.query.setSlop(2)
self.query.add(Term("field", "one"))
self.query.add(Term("field", "three"))
self.query.add(Term("field", "five"))
topDocs = self.searcher.search(self.query, 50)
self.assertEqual(1, topDocs.totalHits, "two total moves")
self.query = PhraseQuery()
self.query.setSlop(5) # it takes six moves to match this phrase
self.query.add(Term("field", "five"))
self.query.add(Term("field", "three"))
self.query.add(Term("field", "one"))
topDocs = self.searcher.search(self.query, 50)
self.assertEqual(0, topDocs.totalHits, "slop of 5 not close enough")
self.query.setSlop(6)
topDocs = self.searcher.search(self.query, 50)
self.assertEqual(1, topDocs.totalHits, "slop of 6 just right")
def testPhraseQueryWithStopAnalyzer(self):
writer = self.getWriter(analyzer=StopAnalyzer(Version.LUCENE_CURRENT))
doc = Document()
doc.add(Field("field", "the stop words are here", TextField.TYPE_STORED))
writer.addDocument(doc)
writer.close()
searcher = self.getSearcher()
# valid exact phrase query
query = PhraseQuery()
query.add(Term("field", "stop"))
query.add(Term("field", "words"))
scoreDocs = searcher.search(query, None, 50).scoreDocs
self.assertEqual(1, len(scoreDocs))
def testPhraseQueryInConjunctionScorer(self):
writer = self.getWriter()
doc = Document()
doc.add(Field("source", "marketing info", TextField.TYPE_STORED))
writer.addDocument(doc)
doc = Document()
doc.add(Field("contents", "foobar", TextField.TYPE_STORED))
doc.add(Field("source", "marketing info", TextField.TYPE_STORED))
writer.addDocument(doc)
writer.close()
searcher = self.getSearcher()
phraseQuery = PhraseQuery()
phraseQuery.add(Term("source", "marketing"))
phraseQuery.add(Term("source", "info"))
topDocs = searcher.search(phraseQuery, 50)
self.assertEqual(2, topDocs.totalHits)
termQuery = TermQuery(Term("contents","foobar"))
booleanQuery = BooleanQuery()
booleanQuery.add(termQuery, BooleanClause.Occur.MUST)
booleanQuery.add(phraseQuery, BooleanClause.Occur.MUST)
topDocs = searcher.search(booleanQuery, 50)
self.assertEqual(1, topDocs.totalHits)
writer = self.getWriter()
doc = Document()
doc.add(Field("contents", "map entry woo", TextField.TYPE_STORED))
writer.addDocument(doc)
doc = Document()
doc.add(Field("contents", "woo map entry", TextField.TYPE_STORED))
writer.addDocument(doc)
doc = Document()
doc.add(Field("contents", "map foobarword entry woo", TextField.TYPE_STORED))
writer.addDocument(doc)
writer.close()
searcher = self.getSearcher()
termQuery = TermQuery(Term("contents", "woo"))
phraseQuery = PhraseQuery()
phraseQuery.add(Term("contents", "map"))
phraseQuery.add(Term("contents", "entry"))
topDocs = searcher.search(termQuery, 50)
self.assertEqual(3, topDocs.totalHits)
topDocs = searcher.search(phraseQuery, 50)
self.assertEqual(2, topDocs.totalHits)
booleanQuery = BooleanQuery()
booleanQuery.add(termQuery, BooleanClause.Occur.MUST)
booleanQuery.add(phraseQuery, BooleanClause.Occur.MUST)
topDocs = searcher.search(booleanQuery, 50)
self.assertEqual(2, topDocs.totalHits)
booleanQuery = BooleanQuery()
booleanQuery.add(phraseQuery, BooleanClause.Occur.MUST)
booleanQuery.add(termQuery, BooleanClause.Occur.MUST)
topDocs = searcher.search(booleanQuery, 50)
self.assertEqual(2, topDocs.totalHits)
if __name__ == "__main__":
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
unittest.main()
except:
pass
else:
unittest.main()
pylucene-4.10.1-1/test/test_PositionIncrement.py 000644 000765 000000 00000026067 12203701624 022053 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import sys, lucene, unittest
from lucene import JArray
from PyLuceneTestCase import PyLuceneTestCase
from MultiSpansWrapper import MultiSpansWrapper
from java.io import StringReader
from org.apache.lucene.analysis import Analyzer
from org.apache.lucene.analysis.core import \
LowerCaseTokenizer, WhitespaceTokenizer
from org.apache.lucene.analysis.tokenattributes import \
CharTermAttribute, OffsetAttribute, PayloadAttribute, \
PositionIncrementAttribute
from org.apache.lucene.document import Document, Field, TextField
from org.apache.lucene.index import MultiFields, Term
from org.apache.lucene.queryparser.classic import QueryParser
from org.apache.lucene.search import MultiPhraseQuery, PhraseQuery
from org.apache.lucene.search.payloads import PayloadSpanUtil
from org.apache.lucene.search.spans import SpanNearQuery, SpanTermQuery
from org.apache.lucene.util import BytesRef, Version
from org.apache.pylucene.analysis import \
PythonAnalyzer, PythonFilteringTokenFilter, PythonTokenFilter, \
PythonTokenizer
class PositionIncrementTestCase(PyLuceneTestCase):
"""
Unit tests ported from Java Lucene
"""
def testSetPosition(self):
class _tokenizer(PythonTokenizer):
def __init__(_self, reader):
super(_tokenizer, _self).__init__(reader)
_self.TOKENS = ["1", "2", "3", "4", "5"]
_self.INCREMENTS = [1, 2, 1, 0, 1]
_self.i = 0
_self.posIncrAtt = _self.addAttribute(PositionIncrementAttribute.class_)
_self.termAtt = _self.addAttribute(CharTermAttribute.class_)
_self.offsetAtt = _self.addAttribute(OffsetAttribute.class_)
def incrementToken(_self):
if _self.i == len(_self.TOKENS):
return False
_self.clearAttributes()
_self.termAtt.append(_self.TOKENS[_self.i])
_self.offsetAtt.setOffset(_self.i, _self.i)
_self.posIncrAtt.setPositionIncrement(_self.INCREMENTS[_self.i])
_self.i += 1
return True
def end(_self):
pass
def reset(_self):
pass
def close(_self):
pass
class _analyzer(PythonAnalyzer):
def createComponents(_self, fieldName, reader):
return Analyzer.TokenStreamComponents(_tokenizer(reader))
writer = self.getWriter(analyzer=_analyzer())
d = Document()
d.add(Field("field", "bogus", TextField.TYPE_STORED))
writer.addDocument(d)
writer.commit()
writer.close()
searcher = self.getSearcher()
reader = searcher.getIndexReader()
pos = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), "field", BytesRef("1"))
pos.nextDoc()
# first token should be at position 0
self.assertEqual(0, pos.nextPosition())
pos = MultiFields.getTermPositionsEnum(reader, MultiFields.getLiveDocs(reader), "field", BytesRef("2"))
pos.nextDoc()
# second token should be at position 2
self.assertEqual(2, pos.nextPosition())
q = PhraseQuery()
q.add(Term("field", "1"))
q.add(Term("field", "2"))
hits = searcher.search(q, None, 1000).scoreDocs
self.assertEqual(0, len(hits))
# same as previous, just specify positions explicitely.
q = PhraseQuery()
q.add(Term("field", "1"), 0)
q.add(Term("field", "2"), 1)
hits = searcher.search(q, None, 1000).scoreDocs
self.assertEqual(0, len(hits))
# specifying correct positions should find the phrase.
q = PhraseQuery()
q.add(Term("field", "1"), 0)
q.add(Term("field", "2"), 2)
hits = searcher.search(q, None, 1000).scoreDocs
self.assertEqual(1, len(hits))
q = PhraseQuery()
q.add(Term("field", "2"))
q.add(Term("field", "3"))
hits = searcher.search(q, None, 1000).scoreDocs
self.assertEqual(1, len(hits))
q = PhraseQuery()
q.add(Term("field", "3"))
q.add(Term("field", "4"))
hits = searcher.search(q, None, 1000).scoreDocs
self.assertEqual(0, len(hits))
# phrase query would find it when correct positions are specified.
q = PhraseQuery()
q.add(Term("field", "3"), 0)
q.add(Term("field", "4"), 0)
hits = searcher.search(q, None, 1000).scoreDocs
self.assertEqual(1, len(hits))
# phrase query should fail for non existing searched term
# even if there exist another searched terms in the same searched
# position.
q = PhraseQuery()
q.add(Term("field", "3"), 0)
q.add(Term("field", "9"), 0)
hits = searcher.search(q, None, 1000).scoreDocs
self.assertEqual(0, len(hits))
# multi-phrase query should succed for non existing searched term
# because there exist another searched terms in the same searched
# position.
mq = MultiPhraseQuery()
mq.add([Term("field", "3"), Term("field", "9")], 0)
hits = searcher.search(mq, None, 1000).scoreDocs
self.assertEqual(1, len(hits))
q = PhraseQuery()
q.add(Term("field", "2"))
q.add(Term("field", "4"))
hits = searcher.search(q, None, 1000).scoreDocs
self.assertEqual(1, len(hits))
q = PhraseQuery()
q.add(Term("field", "3"))
q.add(Term("field", "5"))
hits = searcher.search(q, None, 1000).scoreDocs
self.assertEqual(1, len(hits))
q = PhraseQuery()
q.add(Term("field", "4"))
q.add(Term("field", "5"))
hits = searcher.search(q, None, 1000).scoreDocs
self.assertEqual(1, len(hits))
q = PhraseQuery()
q.add(Term("field", "2"))
q.add(Term("field", "5"))
hits = searcher.search(q, None, 1000).scoreDocs
self.assertEqual(0, len(hits))
def testPayloadsPos0(self):
writer = self.getWriter(analyzer=TestPayloadAnalyzer())
doc = Document()
doc.add(Field("content", "a a b c d e a f g h i j a b k k",
TextField.TYPE_STORED))
writer.addDocument(doc)
reader = writer.getReader()
writer.close()
tp = MultiFields.getTermPositionsEnum(reader,
MultiFields.getLiveDocs(reader),
"content", BytesRef("a"))
count = 0
self.assert_(tp.nextDoc() != tp.NO_MORE_DOCS)
# "a" occurs 4 times
self.assertEqual(4, tp.freq())
expected = 0
self.assertEqual(expected, tp.nextPosition())
self.assertEqual(1, tp.nextPosition())
self.assertEqual(3, tp.nextPosition())
self.assertEqual(6, tp.nextPosition())
# only one doc has "a"
self.assert_(tp.nextDoc() == tp.NO_MORE_DOCS)
searcher = self.getSearcher(reader=reader)
stq1 = SpanTermQuery(Term("content", "a"))
stq2 = SpanTermQuery(Term("content", "k"))
sqs = [stq1, stq2]
snq = SpanNearQuery(sqs, 30, False)
count = 0
sawZero = False
pspans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), snq)
while pspans.next():
payloads = pspans.getPayload()
sawZero |= pspans.start() == 0
it = payloads.iterator()
while it.hasNext():
count += 1
it.next()
self.assertEqual(5, count)
self.assert_(sawZero)
spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), snq)
count = 0
sawZero = False
while spans.next():
count += 1
sawZero |= spans.start() == 0
self.assertEqual(4, count)
self.assert_(sawZero)
sawZero = False
psu = PayloadSpanUtil(searcher.getTopReaderContext())
pls = psu.getPayloadsForQuery(snq)
count = pls.size()
it = pls.iterator()
while it.hasNext():
bytes = JArray('byte').cast_(it.next())
s = bytes.string_
sawZero |= s == "pos: 0"
self.assertEqual(5, count)
self.assert_(sawZero)
class StopWhitespaceAnalyzer(PythonAnalyzer):
def __init__(self, enablePositionIncrements):
super(StopWhitespaceAnalyzer, self).__init__()
self.enablePositionIncrements = enablePositionIncrements
def createComponents(self, fieldName, reader):
class _stopFilter(PythonFilteringTokenFilter):
def __init__(_self, tokenStream):
super(_stopFilter, _self).__init__(Version.LUCENE_CURRENT, tokenStream)
_self.termAtt = _self.addAttribute(CharTermAttribute.class_);
def accept(_self):
return _self.termAtt.toString() != "stop"
source = WhitespaceTokenizer(Version.LUCENE_CURRENT, reader)
return Analyzer.TokenStreamComponents(source, _stopFilter(source))
class TestPayloadAnalyzer(PythonAnalyzer):
def createComponents(self, fieldName, reader):
source = LowerCaseTokenizer(Version.LUCENE_CURRENT, reader)
return Analyzer.TokenStreamComponents(source, PayloadFilter(source, fieldName))
class PayloadFilter(PythonTokenFilter):
def __init__(self, input, fieldName):
super(PayloadFilter, self).__init__(input)
self.input = input
self.fieldName = fieldName
self.pos = 0
self.i = 0
self.posIncrAttr = input.addAttribute(PositionIncrementAttribute.class_)
self.payloadAttr = input.addAttribute(PayloadAttribute.class_)
self.termAttr = input.addAttribute(CharTermAttribute.class_)
def incrementToken(self):
if self.input.incrementToken():
bytes = JArray('byte')("pos: %d" %(self.pos))
self.payloadAttr.setPayload(BytesRef(bytes))
if self.pos == 0 or self.i % 2 == 1:
posIncr = 1
else:
posIncr = 0
self.posIncrAttr.setPositionIncrement(posIncr)
self.pos += posIncr
self.i += 1
return True
return False
if __name__ == "__main__":
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
unittest.main()
except:
pass
else:
unittest.main()
pylucene-4.10.1-1/test/test_PrefixFilter.py 000644 000765 000000 00000010171 12162654000 020771 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import sys, lucene, unittest
from PyLuceneTestCase import PyLuceneTestCase
from org.apache.lucene.document import Document, Field, StringField
from org.apache.lucene.index import Term
from org.apache.lucene.search import ConstantScoreQuery, PrefixFilter
class PrefixFilterTestCase(PyLuceneTestCase):
"""
Unit tests ported from Java Lucene
"""
def testPrefixFilter(self):
writer = self.getWriter()
categories = ["/Computers/Linux",
"/Computers/Mac/One",
"/Computers/Mac/Two",
"/Computers/Windows"]
for category in categories:
doc = Document()
doc.add(Field("category", category, StringField.TYPE_STORED))
writer.addDocument(doc)
writer.close()
# PrefixFilter combined with ConstantScoreQuery
filter = PrefixFilter(Term("category", "/Computers"))
query = ConstantScoreQuery(filter)
searcher = self.getSearcher()
topDocs = searcher.search(query, 50)
self.assertEqual(4, topDocs.totalHits,
"All documents in /Computers category and below")
# test middle of values
filter = PrefixFilter(Term("category", "/Computers/Mac"))
query = ConstantScoreQuery(filter)
topDocs = searcher.search(query, 50)
self.assertEqual(2, topDocs.totalHits, "Two in /Computers/Mac")
# test start of values
filter = PrefixFilter(Term("category", "/Computers/Linux"))
query = ConstantScoreQuery(filter)
topDocs = searcher.search(query, 50)
self.assertEqual(1, topDocs.totalHits, "One in /Computers/Linux")
# test end of values
filter = PrefixFilter(Term("category", "/Computers/Windows"))
query = ConstantScoreQuery(filter)
topDocs = searcher.search(query, 50)
self.assertEqual(1, topDocs.totalHits, "One in /Computers/Windows")
# test non-existant
filter = PrefixFilter(Term("category", "/Computers/ObsoleteOS"))
query = ConstantScoreQuery(filter)
topDocs = searcher.search(query, 50)
self.assertEqual(0, topDocs.totalHits, "no documents")
# test non-existant, before values
filter = PrefixFilter(Term("category", "/Computers/AAA"))
query = ConstantScoreQuery(filter)
topDocs = searcher.search(query, 50)
self.assertEqual(0, topDocs.totalHits, "no documents")
# test non-existant, after values
filter = PrefixFilter(Term("category", "/Computers/ZZZ"))
query = ConstantScoreQuery(filter)
topDocs = searcher.search(query, 50)
self.assertEqual(0, topDocs.totalHits, "no documents")
# test zero-length prefix
filter = PrefixFilter(Term("category", ""))
query = ConstantScoreQuery(filter)
topDocs = searcher.search(query, 50)
self.assertEqual(4, topDocs.totalHits, "all documents")
# test non-existant field
filter = PrefixFilter(Term("nonexistantfield", "/Computers"))
query = ConstantScoreQuery(filter)
topDocs = searcher.search(query, 50)
self.assertEqual(0, topDocs.totalHits, "no documents")
if __name__ == "__main__":
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
unittest.main()
except:
pass
else:
unittest.main()
pylucene-4.10.1-1/test/test_PrefixQuery.py 000644 000765 000000 00000004112 12162654000 020647 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import sys, lucene, unittest
from PyLuceneTestCase import PyLuceneTestCase
from org.apache.lucene.document import Document, Field, StringField
from org.apache.lucene.index import Term
from org.apache.lucene.search import PrefixQuery
class PrefixQueryTestCase(PyLuceneTestCase):
"""
Unit tests ported from Java Lucene
"""
def testPrefixQuery(self):
writer = self.getWriter()
categories = ["/Computers", "/Computers/Mac", "/Computers/Windows"]
for category in categories:
doc = Document()
doc.add(Field("category", category, StringField.TYPE_STORED))
writer.addDocument(doc)
writer.close()
query = PrefixQuery(Term("category", "/Computers"))
searcher = self.getSearcher()
topDocs = searcher.search(query, 50)
self.assertEqual(3, topDocs.totalHits,
"All documents in /Computers category and below")
query = PrefixQuery(Term("category", "/Computers/Mac"))
topDocs = searcher.search(query, 50)
self.assertEqual(1, topDocs.totalHits, "One in /Computers/Mac")
if __name__ == "__main__":
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
unittest.main()
except:
pass
else:
unittest.main()
pylucene-4.10.1-1/test/test_PyLucene.py 000644 000765 000000 00000027340 12356527510 020132 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import sys, lucene, unittest
import os, shutil
from java.io import File, StringReader
from org.apache.lucene.analysis.core import WhitespaceAnalyzer
from org.apache.lucene.analysis.miscellaneous import LimitTokenCountAnalyzer
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.document import \
Document, Field, StoredField, StringField, TextField
from org.apache.lucene.index import \
IndexWriter, IndexWriterConfig, DirectoryReader, MultiFields, Term
from org.apache.lucene.queryparser.classic import \
MultiFieldQueryParser, QueryParser
from org.apache.lucene.search import BooleanClause, IndexSearcher, TermQuery
from org.apache.lucene.store import MMapDirectory, SimpleFSDirectory
from org.apache.lucene.util import BytesRefIterator, Version
class Test_PyLuceneBase(object):
def getAnalyzer(self):
return StandardAnalyzer(Version.LUCENE_CURRENT)
def openStore(self):
raise NotImplemented
def closeStore(self, store, *args):
pass
def getWriter(self, store, analyzer=None, create=False):
if analyzer is None:
analyzer = WhitespaceAnalyzer(Version.LUCENE_CURRENT)
analyzer = LimitTokenCountAnalyzer(analyzer, 10000)
config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
if create:
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
writer = IndexWriter(store, config)
return writer
def getReader(self, store, analyzer):
pass
def getSearcher(self, store):
return IndexSearcher(DirectoryReader.open(store))
def test_indexDocument(self):
store = self.openStore()
writer = None
try:
analyzer = self.getAnalyzer()
writer = self.getWriter(store, analyzer, True)
doc = Document()
doc.add(Field("title", "value of testing",
TextField.TYPE_STORED))
doc.add(Field("docid", str(1),
StringField.TYPE_NOT_STORED))
doc.add(Field("owner", "unittester",
StringField.TYPE_STORED))
doc.add(Field("search_name", "wisdom",
StoredField.TYPE))
doc.add(Field("meta_words", "rabbits are beautiful",
TextField.TYPE_NOT_STORED))
writer.addDocument(doc)
finally:
self.closeStore(store, writer)
def test_indexDocumentWithText(self):
store = self.openStore()
writer = None
try:
analyzer = self.getAnalyzer()
writer = self.getWriter(store, analyzer, True)
doc = Document()
doc.add(Field("title", "value of testing",
TextField.TYPE_STORED))
doc.add(Field("docid", str(1),
StringField.TYPE_NOT_STORED))
doc.add(Field("owner", "unittester",
StringField.TYPE_STORED))
doc.add(Field("search_name", "wisdom",
StoredField.TYPE))
doc.add(Field("meta_words", "rabbits are beautiful",
TextField.TYPE_NOT_STORED))
body_text = "hello world" * 20
body_reader = StringReader(body_text)
doc.add(Field("content", body_reader))
writer.addDocument(doc)
finally:
self.closeStore(store, writer)
def test_indexDocumentWithUnicodeText(self):
store = self.openStore()
writer = None
try:
analyzer = self.getAnalyzer()
writer = self.getWriter(store, analyzer, True)
doc = Document()
doc.add(Field("title", "value of testing",
TextField.TYPE_STORED))
doc.add(Field("docid", str(1),
StringField.TYPE_NOT_STORED))
doc.add(Field("owner", "unittester",
StringField.TYPE_STORED))
doc.add(Field("search_name", "wisdom",
StoredField.TYPE))
doc.add(Field("meta_words", "rabbits are beautiful",
TextField.TYPE_NOT_STORED))
# using a unicode body cause problems, which seems very odd
# since the python type is the same regardless affter doing
# the encode
body_text = u"hello world"*20
body_reader = StringReader(body_text)
doc.add(Field("content", body_reader))
writer.addDocument(doc)
finally:
self.closeStore(store, writer)
def test_searchDocuments(self):
self.test_indexDocument()
store = self.openStore()
searcher = None
try:
searcher = self.getSearcher(store)
query = QueryParser(Version.LUCENE_CURRENT, "title",
self.getAnalyzer()).parse("value")
topDocs = searcher.search(query, 50)
self.assertEqual(topDocs.totalHits, 1)
finally:
self.closeStore(store)
def test_searchDocumentsWithMultiField(self):
"""
Tests searching with MultiFieldQueryParser
"""
self.test_indexDocument()
store = self.openStore()
searcher = None
try:
searcher = self.getSearcher(store)
SHOULD = BooleanClause.Occur.SHOULD
query = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT,
"value", ["title", "docid"],
[SHOULD, SHOULD],
self.getAnalyzer())
topDocs = searcher.search(query, 50)
self.assertEquals(1, topDocs.totalHits)
finally:
self.closeStore(store)
def test_removeDocument(self):
self.test_indexDocument()
store = self.openStore()
searcher = None
writer = None
try:
searcher = self.getSearcher(store)
query = TermQuery(Term("docid", str(1)))
topDocs = searcher.search(query, 50)
self.assertEqual(topDocs.totalHits, 1)
# be careful with ids they are ephemeral
docid = topDocs.scoreDocs[0].doc
writer = self.getWriter(store)
writer.deleteDocuments(Term("docid", str(1)))
finally:
self.closeStore(store, writer)
store = self.openStore()
searcher = None
try:
searcher = self.getSearcher(store)
query = TermQuery(Term("docid", str(1)))
topDocs = searcher.search(query, 50)
self.assertEqual(topDocs.totalHits, 0)
finally:
self.closeStore(store)
def test_removeDocuments(self):
self.test_indexDocument()
store = self.openStore()
writer = None
try:
writer = self.getWriter(store)
writer.deleteDocuments(Term('docid', str(1)))
finally:
self.closeStore(store, writer)
store = self.openStore()
searcher = None
try:
searcher = self.getSearcher(store)
query = QueryParser(Version.LUCENE_CURRENT, "title",
self.getAnalyzer()).parse("value")
topDocs = searcher.search(query, 50)
self.assertEqual(topDocs.totalHits, 0)
finally:
self.closeStore(store)
def test_FieldEnumeration(self):
self.test_indexDocument()
store = self.openStore()
writer = None
try:
analyzer = self.getAnalyzer()
writer = self.getWriter(store, analyzer, False)
doc = Document()
doc.add(Field("title", "value of testing",
TextField.TYPE_STORED))
doc.add(Field("docid", str(2),
StringField.TYPE_NOT_STORED))
doc.add(Field("owner", "unittester",
StringField.TYPE_STORED))
doc.add(Field("search_name", "wisdom",
StoredField.TYPE))
doc.add(Field("meta_words", "rabbits are beautiful",
TextField.TYPE_NOT_STORED))
writer.addDocument(doc)
doc = Document()
doc.add(Field("owner", "unittester",
StringField.TYPE_NOT_STORED))
doc.add(Field("search_name", "wisdom",
StoredField.TYPE))
doc.add(Field("meta_words", "rabbits are beautiful",
TextField.TYPE_NOT_STORED))
writer.addDocument(doc)
finally:
self.closeStore(store, writer)
store = self.openStore()
reader = None
try:
reader = DirectoryReader.open(store)
term_enum = MultiFields.getTerms(reader, "docid").iterator(None)
docids = [term.utf8ToString()
for term in BytesRefIterator.cast_(term_enum)]
self.assertEqual(len(docids), 2)
finally:
self.closeStore(store, reader)
def test_getFieldInfos(self):
self.test_indexDocument()
store = self.openStore()
reader = None
try:
reader = DirectoryReader.open(store)
fieldInfos = MultiFields.getMergedFieldInfos(reader)
for fieldInfo in fieldInfos.iterator():
self.assert_(fieldInfo.name in ['owner', 'search_name',
'meta_words', 'docid', 'title'])
if fieldInfo.isIndexed():
self.assert_(fieldInfo.name in ['owner', 'meta_words',
'docid', 'title'])
if fieldInfo.isIndexed() and not fieldInfo.hasVectors():
self.assert_(fieldInfo.name in ['owner', 'meta_words',
'docid', 'title'])
finally:
store = self.closeStore(store, reader)
class Test_PyLuceneWithFSStore(unittest.TestCase, Test_PyLuceneBase):
STORE_DIR = "testrepo"
def setUp(self):
if not os.path.exists(self.STORE_DIR):
os.mkdir(self.STORE_DIR)
def tearDown(self):
if os.path.exists(self.STORE_DIR):
shutil.rmtree(self.STORE_DIR)
def openStore(self):
return SimpleFSDirectory(File(self.STORE_DIR))
def closeStore(self, store, *args):
for arg in args:
if arg is not None:
arg.close()
store.close()
class Test_PyLuceneWithMMapStore(Test_PyLuceneWithFSStore):
def openStore(self):
return MMapDirectory(File(self.STORE_DIR))
if __name__ == "__main__":
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
unittest.main()
except:
pass
else:
unittest.main()
pylucene-4.10.1-1/test/test_PyLuceneThread.py 000644 000765 000000 00000007720 12162654000 021250 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import sys, lucene, unittest
import time, threading
from lucene import getVMEnv
from PyLuceneTestCase import PyLuceneTestCase
from java.lang import Thread
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.document import Document, Field, TextField
from org.apache.lucene.index import Term
from org.apache.lucene.search import PhraseQuery, TermQuery
from org.apache.lucene.util import Version
class PyLuceneThreadTestCase(PyLuceneTestCase):
"""
Test using threads in PyLucene with python threads
"""
def setUp(self):
super(PyLuceneThreadTestCase, self).setUp()
self.classLoader = Thread.currentThread().getContextClassLoader()
writer = self.getWriter(analyzer=StandardAnalyzer(Version.LUCENE_CURRENT))
doc1 = Document()
doc2 = Document()
doc3 = Document()
doc4 = Document()
doc1.add(Field("field", "one", TextField.TYPE_STORED))
doc2.add(Field("field", "two", TextField.TYPE_STORED))
doc3.add(Field("field", "three", TextField.TYPE_STORED))
doc4.add(Field("field", "one", TextField.TYPE_STORED))
writer.addDocument(doc1)
writer.addDocument(doc2)
writer.addDocument(doc3)
writer.addDocument(doc4)
writer.commit()
writer.close()
self.testData = [('one',2), ('two',1), ('three', 1), ('five', 0)] * 500
self.lock = threading.Lock()
self.totalQueries = 0
def testWithMainThread(self):
""" warm up test for runSearch in main thread """
self.runSearch(2000, True)
def testWithPyLuceneThread(self):
""" Run 5 threads with 2000 queries each """
threads = []
for i in xrange(5):
threads.append(threading.Thread(target=self.runSearch,
args=(2000,)))
for thread in threads:
thread.start()
for thread in threads:
thread.join()
# we survived!
# and all queries have ran successfully
self.assertEqual(10000, self.totalQueries)
def runSearch(self, runCount, mainThread=False):
""" search for runCount number of times """
# problem: if there are any assertion errors in the child
# thread, the calling thread is not notified and may still
# consider the test case pass. We are using self.totalQueries
# to double check that work has actually been done.
if not mainThread:
getVMEnv().attachCurrentThread()
time.sleep(0.5)
searcher = self.getSearcher()
try:
self.query = PhraseQuery()
for word, count in self.testData[0:runCount]:
query = TermQuery(Term("field", word))
topDocs = searcher.search(query, 50)
self.assertEqual(topDocs.totalHits, count)
self.lock.acquire()
self.totalQueries += 1
self.lock.release()
finally:
del searcher
if __name__ == "__main__":
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
unittest.main()
except:
pass
else:
unittest.main()
pylucene-4.10.1-1/test/test_PythonDirectory.py 000644 000765 000000 00000021004 12356527510 021543 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import sys, lucene, unittest
import os, shutil
import test_PyLucene
from binascii import crc32
from threading import RLock
from lucene import JavaError, JArray
from java.lang import String
from java.io import IOException
from org.apache.pylucene.store import \
PythonLock, PythonLockFactory, \
PythonIndexInput, PythonIndexOutput, PythonDirectory
"""
The Directory Implementation here is for testing purposes only, not meant
as an example of writing one, the implementation here suffers from a lack
of safety when dealing with concurrent modifications as it does away with
the file locking in the default lucene fsdirectory implementation.
"""
DEBUG = False
class DebugWrapper(object):
def __init__(self, obj):
self.obj = obj
def __getattr__(self, name):
print self.obj.__class__.__name__, self.obj.name, name
sys.stdout.flush()
return getattr(self.obj, name)
class DebugFactory(object):
def __init__(self, klass):
self.klass = klass
def __call__(self, *args, **kw):
instance = self.klass(*args, **kw)
return DebugWrapper(instance)
class PythonDirLock(PythonLock):
# only safe for a single process
def __init__(self, name, path, lock):
super(PythonDirLock, self).__init__()
self.name = name
self.lock_file = path
self.lock = lock
def isLocked(self):
return self.lock.locked()
def obtain(self):
return self.lock.acquire()
def release(self):
return self.lock.release()
def close(self):
if hasattr(self.lock, 'close'):
self.lock.close()
class PythonDirLockFactory(PythonLockFactory):
def __init__(self, path):
super(PythonDirLockFactory, self).__init__()
self.path = path
self._locks = {}
def makeLock(self, name):
lock = self._locks.get(name)
if lock is None:
lock = PythonDirLock(name, os.path.join(self.path, name), RLock())
self._locks[name] = lock
return lock
def clearLock(self, name):
lock = self._locks.pop(name, None)
if lock is not None:
lock.release()
class PythonFileStreamInput(PythonIndexInput):
def __init__(self, name, fh, size, clone=False):
if not clone:
super(PythonFileStreamInput, self).__init__(name, size)
self.name = name
self.fh = fh
self._length = size
self.isOpen = True
self.isClone = clone
def length(self):
return long(self._length)
def clone(self):
clone = PythonFileStreamInput(self.name, self.fh, self._length, True)
return super(PythonFileStreamInput, self).clone(clone)
def close(self):
if self.isOpen:
self.isOpen = False
if not self.isClone:
self.fh.close()
def readInternal(self, length, pos):
self.fh.seek(pos)
return JArray('byte')(self.fh.read(length))
def seekInternal(self, pos):
self.fh.seek(pos)
class PythonFileStreamOutput(PythonIndexOutput):
def __init__(self, name, fh):
super(PythonFileStreamOutput, self).__init__()
self.name = name
self.fh = fh
self.isOpen = True
self._length = 0
self.crc = None
def close(self):
if self.isOpen:
self.isOpen = False
self.fh.flush()
self.fh.close()
def getFilePointer(self):
return long(self._length)
def getChecksum(self):
return long(self.crc & 0xffffffff)
def writeByte(self, b):
if b < 0:
data = chr(b + 256)
else:
data = chr(b)
self.fh.write(data)
self._length += 1
if self.crc is None:
self.crc = crc32(data)
else:
self.crc = crc32(data, self.crc)
def writeBytes(self, bytes):
data = bytes.string_
self.fh.write(data)
self.fh.flush()
self._length += len(data)
if self.crc is None:
self.crc = crc32(data)
else:
self.crc = crc32(data, self.crc)
class PythonFileDirectory(PythonDirectory):
def __init__(self, path):
super(PythonFileDirectory, self).__init__()
self._lockFactory = PythonDirLockFactory(path)
self.name = path
assert os.path.isdir(path)
self.path = path
self._streams = []
def close(self):
for stream in self._streams:
stream.close()
del self._streams[:]
def createOutput(self, name, context):
file_path = os.path.join(self.path, name)
fh = open(file_path, "wb")
stream = PythonFileStreamOutput(name, fh)
self._streams.append(stream)
return stream
def deleteFile(self, name):
if self.fileExists(name):
os.unlink(os.path.join(self.path, name))
def fileExists(self, name):
return os.path.exists(os.path.join(self.path, name))
def fileLength(self, name):
file_path = os.path.join(self.path, name)
return long(os.path.getsize(file_path))
def fileModified(self, name):
file_path = os.path.join(self.path, name)
return os.path.getmtime(file_path)
def listAll(self):
return os.listdir(self.path)
def sync(self, name):
pass
def openInput(self, name, bufferSize=0):
file_path = os.path.join(self.path, name)
try:
fh = open(file_path, "rb")
except IOError:
raise JavaError, IOException(name)
stream = PythonFileStreamInput(name, fh, os.path.getsize(file_path))
self._streams.append(stream)
return stream
def touchFile(self, name):
file_path = os.path.join(self.path, name)
os.utime(file_path, None)
def setLockFactory(self, lockFactory):
pass
def getLockFactory(self):
return None
def clearLock(self, name):
self._lockFactory.clearLock(name)
def makeLock(self, name):
return self._lockFactory.makeLock(name)
if DEBUG:
_globals = globals()
_globals['PythonFileDirectory'] = DebugFactory(PythonFileDirectory)
_globals['PythonFileStreamInput'] = DebugFactory(PythonFileStreamInput)
_globals['PythonFileStreamOutput'] = DebugFactory(PythonFileStreamOutput)
_globals['PythonDirLock'] = DebugFactory(PythonDirLock)
del _globals
class PythonDirectoryTests(unittest.TestCase, test_PyLucene.Test_PyLuceneBase):
STORE_DIR = "testpyrepo"
def setUp(self):
if not os.path.exists(self.STORE_DIR):
os.mkdir(self.STORE_DIR)
def tearDown(self):
if os.path.exists(self.STORE_DIR):
shutil.rmtree(self.STORE_DIR)
def openStore(self):
return PythonFileDirectory(self.STORE_DIR)
def closeStore(self, store, *args):
for arg in args:
if arg is not None:
arg.close()
store.close()
def test_IncrementalLoop(self):
print "Testing Indexing Incremental Looping"
for i in range(100):
print "indexing ", i
sys.stdout.flush()
self.test_indexDocument()
if __name__ == "__main__":
env = lucene.initVM(vmargs=['-Djava.awt.headless=true'])
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
unittest.main()
except:
pass
print 'inputs', env._dumpRefs(True).get('class org.osafoundation.lucene.store.PythonIndexOutput', 0)
print 'outputs', env._dumpRefs(True).get('class org.osafoundation.lucene.store.PythonIndexInput', 0)
print 'locks', env._dumpRefs(True).get('class org.osafoundation.lucene.store.PythonLock', 0)
print 'dirs', env._dumpRefs(True).get('class org.osafoundation.lucene.store.PythonLock', 0)
else:
unittest.main()
pylucene-4.10.1-1/test/test_PythonException.py 000644 000765 000000 00000003622 12413103072 021526 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import sys, lucene, unittest
from PyLuceneTestCase import PyLuceneTestCase
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.util import Version
from org.apache.pylucene.queryparser.classic import PythonQueryParser
class PythonExceptionTestCase(PyLuceneTestCase):
def testThroughLayerException(self):
class TestException(Exception):
pass
class TestQueryParser(PythonQueryParser):
def getFieldQuery_quoted(_self, field, queryText, quoted):
raise TestException("TestException")
qp = TestQueryParser(Version.LUCENE_CURRENT, 'all',
StandardAnalyzer(Version.LUCENE_CURRENT))
if lucene.getVMEnv().isShared():
with self.assertRaises(TestException):
qp.parse("foo bar")
else:
with self.assertRaises(lucene.JavaError):
qp.parse("foo bar")
if __name__ == "__main__":
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
unittest.main()
except:
pass
else:
unittest.main()
pylucene-4.10.1-1/test/test_PythonQueryParser.py 000644 000765 000000 00000006214 12162654000 022055 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import sys, lucene, unittest
from PyLuceneTestCase import PyLuceneTestCase
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.index import Term
from org.apache.lucene.search import BooleanClause, TermQuery
from org.apache.lucene.util import Version
from org.apache.pylucene.queryparser.classic import \
PythonQueryParser, PythonMultiFieldQueryParser
class BooleanTestMixin(object):
def getBooleanQuery(self, clauses, disableCoord):
extra_query = TermQuery(Term("all", "extra_clause"))
extra_clause = BooleanClause(extra_query, BooleanClause.Occur.SHOULD)
clauses.add(extra_clause)
return super(BooleanTestMixin, self).getBooleanQuery(clauses,
disableCoord)
class PythonQueryParserTestCase(PyLuceneTestCase):
def testOverrideBooleanQuery(self):
class TestQueryParser(BooleanTestMixin, PythonQueryParser):
def getFieldQuery_quoted(_self, field, queryText, quoted):
return super(TestQueryParser, _self).getFieldQuery_quoted_super(field, queryText, quoted)
qp = TestQueryParser(Version.LUCENE_CURRENT, 'all',
StandardAnalyzer(Version.LUCENE_CURRENT))
q = qp.parse("foo bar")
self.assertEquals(str(q), "all:foo all:bar all:extra_clause")
class PythonMultiFieldQueryParserTestCase(PyLuceneTestCase):
def testOverrideBooleanQuery(self):
class TestQueryParser(BooleanTestMixin, PythonMultiFieldQueryParser):
def getFieldQuery_quoted(_self, field, queryText, quoted):
return super(TestQueryParser, _self).getFieldQuery_quoted_super(field, queryText, quoted)
qp = TestQueryParser(Version.LUCENE_CURRENT, ['one', 'two'],
StandardAnalyzer(Version.LUCENE_CURRENT))
q = qp.parse(Version.LUCENE_CURRENT, "foo bar", ['one', 'two'],
[BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD],
StandardAnalyzer(Version.LUCENE_CURRENT))
self.assertEquals(str(q), "(one:foo one:bar) (two:foo two:bar)")
if __name__ == "__main__":
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
unittest.main()
except:
pass
else:
unittest.main()
pylucene-4.10.1-1/test/test_RegexQuery.py 000644 000765 000000 00000006076 12162654000 020477 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import sys, lucene, unittest
from PyLuceneTestCase import PyLuceneTestCase
from org.apache.lucene.analysis.core import SimpleAnalyzer
from org.apache.lucene.document import Document, Field, TextField
from org.apache.lucene.index import Term
from org.apache.lucene.sandbox.queries.regex import RegexQuery
from org.apache.lucene.search.spans import \
SpanMultiTermQueryWrapper, SpanNearQuery
class TestRegexQuery(PyLuceneTestCase):
FN = "field"
def setUp(self):
super(TestRegexQuery, self).setUp()
writer = self.getWriter(analyzer=SimpleAnalyzer(self.TEST_VERSION))
doc = Document()
doc.add(Field(self.FN, "the quick brown fox jumps over the lazy dog", TextField.TYPE_NOT_STORED))
writer.addDocument(doc)
writer.commit()
writer.close()
self.searcher = self.getSearcher()
def tearDown(self):
del self.searcher
super(TestRegexQuery, self).tearDown()
def newTerm(self, value):
return Term(self.FN, value)
def regexQueryNrHits(self, regex):
query = RegexQuery(self.newTerm(regex))
return self.searcher.search(query, 50).totalHits
def spanRegexQueryNrHits(self, regex1, regex2, slop, ordered):
srq1 = SpanMultiTermQueryWrapper(RegexQuery(self.newTerm(regex1)))
srq2 = SpanMultiTermQueryWrapper(RegexQuery(self.newTerm(regex2)))
query = SpanNearQuery([srq1, srq2], slop, ordered)
return self.searcher.search(query, 50).totalHits
def testRegex1(self):
self.assertEqual(1, self.regexQueryNrHits("^q.[aeiou]c.*$"))
def testRegex2(self):
self.assertEqual(0, self.regexQueryNrHits("^.[aeiou]c.*$"))
def testRegex3(self):
self.assertEqual(0, self.regexQueryNrHits("^q.[aeiou]c$"))
def testSpanRegex1(self):
self.assertEqual(1, self.spanRegexQueryNrHits("^q.[aeiou]c.*$",
"dog", 6, True))
def testSpanRegex2(self):
self.assertEqual(0, self.spanRegexQueryNrHits("^q.[aeiou]c.*$",
"dog", 5, True))
if __name__ == "__main__":
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
unittest.main()
except:
pass
else:
unittest.main()
pylucene-4.10.1-1/test/test_RewriteQuery.py 000644 000765 000000 00000003562 12162654000 021043 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
# Originally intended to demonstrate a memory leak. See
# http://lists.osafoundation.org/pipermail/pylucene-dev/2008-October/002937.html
# and followup
import sys, lucene, unittest
from PyLuceneTestCase import PyLuceneTestCase
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.index import Term
from org.apache.lucene.search import TermQuery
from org.apache.lucene.util import Version
class QueryRewriteTest(PyLuceneTestCase):
def setUp(self):
super(QueryRewriteTest, self).setUp()
writer = self.getWriter(analyzer=StandardAnalyzer(Version.LUCENE_CURRENT))
writer.close()
self.reader = self.getReader()
self.term = Term('all', 'foo')
def testQuery(self):
base_query = TermQuery(self.term)
new_query = base_query.rewrite(self.reader)
self.assertEquals(base_query, new_query)
if __name__ == "__main__":
env = lucene.initVM(vmargs=['-Djava.awt.headless=true'])
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
unittest.main()
except:
pass
else:
unittest.main()
pylucene-4.10.1-1/test/test_Similarity.py 000644 000765 000000 00000010353 12162654000 020516 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import sys, lucene, unittest
from PyLuceneTestCase import PyLuceneTestCase
from org.apache.lucene.analysis.core import SimpleAnalyzer
from org.apache.lucene.document import Document, Field, TextField
from org.apache.lucene.index import Term
from org.apache.lucene.search import \
BooleanClause, BooleanQuery, Explanation, PhraseQuery, TermQuery
from org.apache.lucene.util import Version
from org.apache.pylucene.search import PythonCollector
from org.apache.pylucene.search.similarities import PythonDefaultSimilarity
class SimpleSimilarity(PythonDefaultSimilarity):
def queryNorm(self, sumOfSquaredWeights):
return 1.0
def coord(self, overlap, maxOverlap):
return 1.0
def lengthNorm(self, state):
return state.getBoost()
def tf(self, freq):
return freq
def sloppyFreq(self, distance):
return 2.0
def idf(self, docFreq, numDocs):
return 1.0
def idfExplain(self, collectionStats, termStats):
return Explanation(1.0, "inexplicable")
class SimilarityTestCase(PyLuceneTestCase):
"""
Unit tests ported from Java Lucene
"""
def testSimilarity(self):
writer = self.getWriter(analyzer=SimpleAnalyzer(Version.LUCENE_CURRENT),
similarity=SimpleSimilarity())
d1 = Document()
d1.add(Field("field", "a c", TextField.TYPE_STORED))
d2 = Document()
d2.add(Field("field", "a b c", TextField.TYPE_STORED))
writer.addDocument(d1)
writer.addDocument(d2)
writer.commit()
writer.close()
searcher = self.getSearcher()
searcher.setSimilarity(SimpleSimilarity())
a = Term("field", "a")
b = Term("field", "b")
c = Term("field", "c")
class collector1(PythonCollector):
def collect(_self, doc, score):
self.assertEqual(1.0, score)
def setNextReader(_self, context):
pass
def acceptsDocsOutOfOrder(_self):
return True
searcher.search(TermQuery(b), collector1())
bq = BooleanQuery()
bq.add(TermQuery(a), BooleanClause.Occur.SHOULD)
bq.add(TermQuery(b), BooleanClause.Occur.SHOULD)
class collector2(PythonCollector):
def collect(_self, doc, score):
self.assertEqual(doc + _self.base + 1, score)
def setNextReader(_self, context):
_self.base = context.docBase
def acceptsDocsOutOfOrder(_self):
return True
searcher.search(bq, collector2())
pq = PhraseQuery()
pq.add(a)
pq.add(c)
class collector3(PythonCollector):
def collect(_self, doc, score):
self.assertEqual(1.0, score)
def setNextReader(_self, context):
pass
def acceptsDocsOutOfOrder(_self):
return True
searcher.search(pq, collector3())
pq.setSlop(2)
class collector4(PythonCollector):
def collect(_self, doc, score):
self.assertEqual(2.0, score)
def setNextReader(_self, context):
pass
def acceptsDocsOutOfOrder(_self):
return True
searcher.search(pq, collector4())
if __name__ == "__main__":
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
unittest.main()
except:
pass
else:
unittest.main()
pylucene-4.10.1-1/test/test_Sort.py 000644 000765 000000 00000140756 12162654000 017332 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import sys, lucene, unittest, math
from PyLuceneTestCase import PyLuceneTestCase
from itertools import izip
from random import randint
from java.lang import Byte, Double, Float, Integer, Long, Short
from java.util import BitSet
from java.util.concurrent import Executors, TimeUnit
from org.apache.lucene.analysis.core import SimpleAnalyzer
from org.apache.lucene.codecs import Codec
from org.apache.lucene.document import \
Document, Field, FieldType, StringField, StoredField, TextField, \
NumericDocValuesField, SortedDocValuesField, BinaryDocValuesField, \
FloatDocValuesField
from org.apache.lucene.index import \
FieldInfo, LogDocMergePolicy, MultiReader, Term
from org.apache.lucene.search import \
BooleanQuery, BooleanClause, FieldCache, IndexSearcher, MatchAllDocsQuery, Sort, \
SortField, TermQuery, TopFieldCollector
from org.apache.lucene.store import RAMDirectory
from org.apache.lucene.util import \
BytesRef, DocIdBitSet, FieldCacheSanityChecker, NamedThreadFactory, Version
from org.apache.pylucene.search import \
PythonIntParser, PythonFloatParser, PythonLongParser, PythonDoubleParser, \
PythonByteParser, PythonShortParser, \
PythonFieldComparator, PythonFieldComparatorSource, PythonFilter
NUM_STRINGS = 750
class SortTestCase(PyLuceneTestCase):
"""
Unit tests for sorting code, ported from Java Lucene
"""
def __init__(self, *args, **kwds):
super(SortTestCase, self).__init__(*args, **kwds)
self.supportsDocValues = Codec.getDefault().getName() > "Lucene3x"
self.data = [
# tracer contents int float string custom i18n long double, short, byte, 'custom parser encoding'
[ "A", "x a", "5", "4f", "c", "A-3", "p\u00EAche", "10", "-4.0", "3", "126", "J" ], # A, x
[ "B", "y a", "5", "3.4028235E38", "i", "B-10", "HAT", "1000000000", "40.0", "24", "1", "I" ], # B, y
[ "C", "x a b c", "2147483647", "1.0", "j", "A-2", "p\u00E9ch\u00E9", "99999999","40.00002343", "125", "15", "H" ], # C, x
[ "D", "y a b c", "-1", "0.0f", "a", "C-0", "HUT", str(Long.MAX_VALUE), str(Double.MIN_VALUE), str(Short.MIN_VALUE), str(Byte.MIN_VALUE), "G" ], # D, y
[ "E", "x a b c d", "5", "2f", "h", "B-8", "peach", str(Long.MIN_VALUE), str(Double.MAX_VALUE), str(Short.MAX_VALUE), str(Byte.MAX_VALUE), "F" ], # E, x
[ "F", "y a b c d", "2", "3.14159f", "g", "B-1", "H\u00C5T", "-44", "343.034435444", "-3", "0", "E" ], # F, y
[ "G", "x a b c d", "3", "-1.0", "f", "C-100", "sin", "323254543543", "4.043544", "5", "100", "D" ], # G, x
[ "H", "y a b c d", "0", "1.4E-45", "e", "C-88", "H\u00D8T", "1023423423005","4.043545", "10", "-50", "C" ], # H, y
[ "I", "x a b c d e f", "-2147483648", "1.0e+0", "d", "A-10", "s\u00EDn", "332422459999", "4.043546", "-340", "51", "B" ], # I, x
[ "J", "y a b c d e f", "4", ".5", "b", "C-7", "HOT", "34334543543", "4.0000220343", "300", "2", "A" ], # J, y
[ "W", "g", "1", None, None, None, None, None, None, None, None, None ],
[ "X", "g", "1", "0.1", None, None, None, None, None, None, None, None ],
[ "Y", "g", "1", "0.2", None, None, None, None, None, None, None, None ],
[ "Z", "f g", None, None, None, None, None, None, None, None, None, None ],
# Sort Missing first/last
[ "a", "m", None, None, None, None, None, None, None, None, None, None ],
[ "b", "m", "4", "4.0", "4", None, None, "4", "4", "4", "4", None ],
[ "c", "m", "5", "5.0", "5", None, None, "5", "5", "5", "5", None ],
[ "d", "m", None, None, None, None, None, None, None, None, None, None ],
]
def setUp(self):
super(SortTestCase, self).setUp()
self.dirs = []
self.dvStringSorted = self.getRandomBoolean()
# run the randomization at setup so that threads share it and we don't
# hit cache incompatibilities
self.notSorted = self.getRandomBoolean()
# If you index as sorted source you can still sort by value instead:
self.sortByValue = self.getRandomBoolean()
self.full = self._getFullIndex()
self.searchX = self._getXIndex()
self.searchY = self._getYIndex()
self.queryX = TermQuery(Term("contents", "x"))
self.queryY = TermQuery(Term("contents", "y"))
self.queryA = TermQuery(Term("contents", "a"))
self.queryE = TermQuery(Term("contents", "e"))
self.queryF = TermQuery(Term("contents", "f"))
self.queryG = TermQuery(Term("contents", "g"))
self.queryM = TermQuery(Term("contents", "m"))
self.sort = Sort()
def tearDown(self):
for directory in self.dirs:
directory.close()
super(SortTestCase, self).tearDown()
def _getIndex(self, even, odd):
mergePolicy = LogDocMergePolicy()
mergePolicy.setMergeFactor(1000)
directory = RAMDirectory()
self.dirs.append(directory)
writer = self.getWriter(directory=directory,
analyzer=SimpleAnalyzer(Version.LUCENE_CURRENT),
maxBufferedDocs=2, mergePolicy=mergePolicy)
if self.dvStringSorted:
# Index sorted
stringDVType = FieldInfo.DocValuesType.SORTED
elif self.notSorted:
# Index non-sorted
stringDVType = FieldInfo.DocValuesType.BINARY
else:
# sorted anyway
stringDVType = FieldInfo.DocValuesType.SORTED
ft1 = FieldType()
ft1.setStored(True)
ft2 = FieldType()
ft2.setIndexed(True)
for i in xrange(len(self.data)):
if (i % 2 == 0 and even) or (i % 2 == 1 and odd):
doc = Document()
doc.add(Field("tracer", self.data[i][0], ft1))
doc.add(TextField("contents", self.data[i][1], Field.Store.NO))
if self.data[i][2] is not None:
doc.add(StringField("int", self.data[i][2], Field.Store.NO))
if self.supportsDocValues:
doc.add(NumericDocValuesField("int_dv", Long.parseLong(self.data[i][2])))
if self.data[i][3] is not None:
doc.add(StringField("float", self.data[i][3], Field.Store.NO))
if self.supportsDocValues:
doc.add(FloatDocValuesField("float_dv", Float.parseFloat(self.data[i][3])))
if self.data[i][4] is not None:
doc.add(StringField("string", self.data[i][4], Field.Store.NO))
if self.supportsDocValues:
if stringDVType == FieldInfo.DocValuesType.SORTED:
doc.add(SortedDocValuesField("string_dv", BytesRef(self.data[i][4])))
elif stringDVType == FieldInfo.DocValuesType.BINARY:
doc.add(BinaryDocValuesField("string_dv", BytesRef(self.data[i][4])))
else:
raise ValueError("unknown type " + stringDVType)
if self.data[i][5] is not None:
doc.add(StringField("custom", self.data[i][5], Field.Store.NO))
if self.data[i][6] is not None:
doc.add(StringField("i18n", self.data[i][6], Field.Store.NO))
if self.data[i][7] is not None:
doc.add(StringField("long", self.data[i][7], Field.Store.NO))
if self.data[i][8] is not None:
doc.add(StringField("double", self.data[i][8], Field.Store.NO))
if self.supportsDocValues:
doc.add(NumericDocValuesField("double_dv", Double.doubleToRawLongBits(Double.parseDouble(self.data[i][8]))))
if self.data[i][9] is not None:
doc.add(StringField("short", self.data[i][9], Field.Store.NO))
if self.data[i][10] is not None:
doc.add(StringField("byte", self.data[i][10], Field.Store.NO))
if self.data[i][11] is not None:
doc.add(StringField("parser", self.data[i][11], Field.Store.NO))
for f in doc.getFields():
if f.fieldType().indexed() and not f.fieldType().omitNorms():
Field.cast_(f).setBoost(2.0)
writer.addDocument(doc)
reader = writer.getReader()
writer.close()
return self.getSearcher(reader=reader)
def _getFullIndex(self):
return self._getIndex(True, True)
def _getFullStrings(self):
mergePolicy = LogDocMergePolicy()
mergePolicy.setMergeFactor(97)
directory = RAMDirectory()
self.dirs.append(directory)
writer = self.getWriter(directory=directory,
analyzer=SimpleAnalyzer(Version.LUCENE_CURRENT),
maxBufferedDocs=4, mergePolicy=mergePolicy)
onlyStored = FieldType()
onlyStored.setStored(True)
fixedLen = self.getRandomNumber(2, 8)
fixedLen2 = self.getRandomNumber(1, 4)
for i in xrange(NUM_STRINGS):
doc = Document()
num = self.getRandomCharString(self.getRandomNumber(2, 8), 48, 52)
doc.add(Field("tracer", num, onlyStored))
doc.add(StringField("string", num, Field.Store.NO))
if self.supportsDocValues:
if self.dvStringSorted:
doc.add(SortedDocValuesField("string_dv", BytesRef(num)))
else:
doc.add(BinaryDocValuesField("string_dv", BytesRef(num)))
num2 = self.getRandomCharString(self.getRandomNumber(1, 4), 48, 50)
doc.add(StringField("string2", num2, Field.Store.NO))
if self.supportsDocValues:
if self.dvStringSorted:
doc.add(SortedDocValuesField("string2_dv", BytesRef(num2)))
else:
doc.add(BinaryDocValuesField("string2_dv", BytesRef(num2)))
doc.add(Field("tracer2", num2, onlyStored))
for f2 in doc.getFields():
if f2.fieldType().indexed() and not f2.fieldType().omitNorms():
Field.cast_(f2).setBoost(2.0)
numFixed = self.getRandomCharString(fixedLen, 48, 52)
doc.add(Field("fixed_tracer", numFixed, onlyStored))
doc.add(StringField("string_fixed", numFixed, Field.Store.NO))
if self.supportsDocValues:
if self.dvStringSorted:
doc.add(SortedDocValuesField("string_fixed_dv", BytesRef(numFixed)))
else:
doc.add(BinaryDocValuesField("string_fixed_dv", BytesRef(numFixed)))
num2Fixed = self.getRandomCharString(fixedLen2, 48, 52)
doc.add(StringField("string2_fixed", num2Fixed, Field.Store.NO))
if self.supportsDocValues:
if self.dvStringSorted:
doc.add(SortedDocValuesField("string2_fixed_dv", BytesRef(num2Fixed)))
else:
doc.add(BinaryDocValuesField("string2_fixed_dv", BytesRef(num2Fixed)))
doc.add(Field("tracer2_fixed", num2Fixed, onlyStored))
for f2 in doc.getFields():
if f2.fieldType().indexed() and not f2.fieldType().omitNorms():
Field.cast_(f2).setBoost(2.0)
writer.addDocument(doc)
writer.close()
return self.getSearcher(directory=directory)
def getRandomNumberString(self, num, low, high):
return ''.join([self.getRandomNumber(low, high) for i in xrange(num)])
def getRandomCharString(self, num):
return self.getRandomCharString(num, 48, 122)
def getRandomCharString(self, num, start, end):
return ''.join([chr(self.getRandomNumber(start, end))
for i in xrange(num)])
def getRandomNumber(self, low, high):
return randint(low, high)
def getRandomBoolean(self):
return randint(0, 1) == 1
def _getXIndex(self):
return self._getIndex(True, False)
def _getYIndex(self):
return self._getIndex(False, True)
def _getEmptyIndex(self):
return self._getIndex(False, False)
def testBuiltInSorts(self):
"""
test the sorts by score and document number
"""
sort = self.sort
self._assertMatches(self.full, self.queryX, sort, "ACEGI")
self._assertMatches(self.full, self.queryY, sort, "BDFHJ")
sort.setSort(SortField.FIELD_DOC)
self._assertMatches(self.full, self.queryX, sort, "ACEGI")
self._assertMatches(self.full, self.queryY, sort, "BDFHJ")
def testTypedSort(self):
"""
test sorts where the type of field is specified
"""
sort = self.sort
sort.setSort([SortField("int", SortField.Type.INT), SortField.FIELD_DOC])
self._assertMatches(self.full, self.queryX, sort, "IGAEC")
self._assertMatches(self.full, self.queryY, sort, "DHFJB")
sort.setSort([SortField("float", SortField.Type.FLOAT),
SortField.FIELD_DOC])
self._assertMatches(self.full, self.queryX, sort, "GCIEA")
self._assertMatches(self.full, self.queryY, sort, "DHJFB")
sort.setSort([SortField("long", SortField.Type.LONG),
SortField.FIELD_DOC])
self._assertMatches(self.full, self.queryX, sort, "EACGI")
self._assertMatches(self.full, self.queryY, sort, "FBJHD")
sort.setSort([SortField("double", SortField.Type.DOUBLE),
SortField.FIELD_DOC])
self._assertMatches(self.full, self.queryX, sort, "AGICE")
self._assertMatches(self.full, self.queryY, sort, "DJHBF")
sort.setSort([SortField("byte", SortField.Type.BYTE),
SortField.FIELD_DOC])
self._assertMatches(self.full, self.queryX, sort, "CIGAE")
self._assertMatches(self.full, self.queryY, sort, "DHFBJ")
sort.setSort([SortField("short", SortField.Type.SHORT),
SortField.FIELD_DOC])
self._assertMatches(self.full, self.queryX, sort, "IAGCE")
self._assertMatches(self.full, self.queryY, sort, "DFHBJ")
sort.setSort([SortField("string", SortField.Type.STRING),
SortField.FIELD_DOC])
self._assertMatches(self.full, self.queryX, sort, "AIGEC")
self._assertMatches(self.full, self.queryY, sort, "DJHFB")
if self.supportsDocValues:
sort.setSort([SortField("int_dv", SortField.Type.INT),
SortField.FIELD_DOC])
self._assertMatches(self.full, self.queryX, sort, "IGAEC")
self._assertMatches(self.full, self.queryY, sort, "DHFJB")
sort.setSort([SortField("float_dv", SortField.Type.FLOAT),
SortField.FIELD_DOC])
self._assertMatches(self.full, self.queryX, sort, "GCIEA")
self._assertMatches(self.full, self.queryY, sort, "DHJFB")
sort.setSort([SortField("double_dv", SortField.Type.DOUBLE),
SortField.FIELD_DOC])
self._assertMatches(self.full, self.queryX, sort, "AGICE")
self._assertMatches(self.full, self.queryY, sort, "DJHBF")
sort.setSort([SortField("string_dv", self._getDVStringSortType()),
SortField.FIELD_DOC])
self._assertMatches(self.full, self.queryX, sort, "AIGEC")
self._assertMatches(self.full, self.queryY, sort, "DJHFB")
def _getDVStringSortType(self, allowSorted=True):
if self.dvStringSorted and allowSorted:
if self.sortByValue:
return SortField.Type.STRING_VAL
else:
return SortField.Type.STRING
else:
return SortField.Type.STRING_VAL
def _verifyStringSort(self, sort):
searcher = self._getFullStrings()
result = searcher.search(MatchAllDocsQuery(), None,
self.getRandomNumber(500, searcher.getIndexReader().maxDoc()),
sort).scoreDocs
buff = []
n = len(result)
last = None
lastSub = None
lastDocId = 0
fail = False
if "_fixed" in sort.getSort()[0].getField():
fieldSuffix = "_fixed"
else:
fieldSuffix = ""
for scoreDoc in result:
doc2 = searcher.doc(scoreDoc.doc)
v = doc2.getValues("tracer" + fieldSuffix)
v2 = doc2.getValues("tracer2" + fieldSuffix)
for _v, _v2 in izip(v, v2):
buff.append(_v + "(" + _v2 + ")(" + str(scoreDoc.doc) + ")\n")
if last is not None:
_cmp = cmp(_v, last)
if _cmp < 0: # ensure first field is in order
fail = True
print "fail:", _v, "<", last
buff.append(" WRONG tracer\n")
if _cmp == 0: # ensure second field is in reverse order
_cmp = cmp(_v2, lastSub)
if _cmp > 0:
fail = True
print "rev field fail:", _v2, ">", lastSub
buff.append(" WRONG tracer2\n")
elif _cmp == 0: # ensure docid is in order
if scoreDoc.doc < lastDocId:
fail = True
print "doc fail:", scoreDoc.doc, ">", lastDocId
buff.append(" WRONG docID\n")
last = _v
lastSub = _v2
lastDocId = scoreDoc.doc
if fail:
print "topn field1(field2)(docID):", ''.join(buff)
self.assert_(not fail, "Found sort results out of order")
searcher.getIndexReader().close()
def testStringSort(self):
"""
Test String sorting: small queue to many matches, multi field sort,
reverse sort
"""
sort = self.sort
# Normal string field, var length
sort.setSort([SortField("string", SortField.Type.STRING),
SortField("string2", SortField.Type.STRING, True),
SortField.FIELD_DOC])
self._verifyStringSort(sort)
# Normal string field, fixed length
sort.setSort([SortField("string_fixed", SortField.Type.STRING),
SortField("string2_fixed", SortField.Type.STRING, True),
SortField.FIELD_DOC])
self._verifyStringSort(sort)
# Doc values field, var length
self.assertTrue(self.supportsDocValues, "cannot work with preflex codec")
sort.setSort([SortField("string_dv", self._getDVStringSortType()),
SortField("string2_dv", self._getDVStringSortType(), True),
SortField.FIELD_DOC])
self._verifyStringSort(sort)
# Doc values field, fixed length
sort.setSort([SortField("string_fixed_dv", self._getDVStringSortType()),
SortField("string2_fixed_dv", self._getDVStringSortType(), True),
SortField.FIELD_DOC])
self._verifyStringSort(sort)
def testCustomFieldParserSort(self):
"""
test sorts where the type of field is specified and a custom field
parser is used, that uses a simple char encoding. The sorted string
contains a character beginning from 'A' that is mapped to a numeric
value using some "funny" algorithm to be different for each data
type.
"""
# since tests explicitly use different parsers on the same field name
# we explicitly check/purge the FieldCache between each assertMatch
fc = FieldCache.DEFAULT
class intParser(PythonIntParser):
def parseInt(_self, val):
return (val.bytes[val.offset] - ord('A')) * 123456
def termsEnum(_self, terms):
return terms.iterator(None)
class floatParser(PythonFloatParser):
def parseFloat(_self, val):
return math.sqrt(val.bytes[val.offset])
def termsEnum(_self, terms):
return terms.iterator(None)
class longParser(PythonLongParser):
def parseLong(_self, val):
return (val.bytes[val.offset] - ord('A')) * 1234567890L
def termsEnum(_self, terms):
return terms.iterator(None)
class doubleParser(PythonDoubleParser):
def parseDouble(_self, val):
return math.pow(val.bytes[val.offset], val.bytes[val.offset] - ord('A'))
def termsEnum(_self, terms):
return terms.iterator(None)
class byteParser(PythonByteParser):
def parseByte(_self, val):
return chr(val.bytes[val.offset] - ord('A'))
def termsEnum(_self, terms):
return terms.iterator(None)
class shortParser(PythonShortParser):
def parseShort(_self, val):
return val.bytes[val.offset] - ord('A')
def termsEnum(_self, terms):
return terms.iterator(None)
sort = self.sort
sort.setSort([SortField("parser", intParser()),
SortField.FIELD_DOC])
self._assertMatches(self.full, self.queryA, sort, "JIHGFEDCBA")
self._assertSaneFieldCaches(self.getName() + " IntParser")
fc.purgeAllCaches()
sort.setSort([SortField("parser", floatParser()),
SortField.FIELD_DOC])
self._assertMatches(self.full, self.queryA, sort, "JIHGFEDCBA")
self._assertSaneFieldCaches(self.getName() + " FloatParser")
fc.purgeAllCaches()
sort.setSort([SortField("parser", longParser()),
SortField.FIELD_DOC])
self._assertMatches(self.full, self.queryA, sort, "JIHGFEDCBA")
self._assertSaneFieldCaches(self.getName() + " LongParser")
fc.purgeAllCaches()
sort.setSort([SortField("parser", doubleParser()),
SortField.FIELD_DOC])
self._assertMatches(self.full, self.queryA, sort, "JIHGFEDCBA")
self._assertSaneFieldCaches(self.getName() + " DoubleParser")
fc.purgeAllCaches()
sort.setSort([SortField("parser", byteParser()),
SortField.FIELD_DOC])
self._assertMatches(self.full, self.queryA, sort, "JIHGFEDCBA")
self._assertSaneFieldCaches(self.getName() + " ByteParser")
fc.purgeAllCaches()
sort.setSort([SortField("parser", shortParser()),
SortField.FIELD_DOC])
self._assertMatches(self.full, self.queryA, sort, "JIHGFEDCBA")
self._assertSaneFieldCaches(self.getName() + " ShortParser")
fc.purgeAllCaches()
def testEmptyIndex(self):
"""
test sorts when there's nothing in the index
"""
sort = self.sort
empty = self._getEmptyIndex()
self._assertMatches(empty, self.queryX, sort, "")
sort.setSort(SortField.FIELD_DOC)
self._assertMatches(empty, self.queryX, sort, "")
sort.setSort([SortField("int", SortField.Type.INT),
SortField.FIELD_DOC])
self._assertMatches(empty, self.queryX, sort, "")
sort.setSort([SortField("int_dv", SortField.Type.INT),
SortField.FIELD_DOC])
self._assertMatches(empty, self.queryX, sort, "")
sort.setSort([SortField("string", SortField.Type.STRING, True),
SortField.FIELD_DOC])
self._assertMatches(empty, self.queryX, sort, "")
sort.setSort([SortField("float", SortField.Type.FLOAT),
SortField("string", SortField.Type.STRING)])
self._assertMatches(empty, self.queryX, sort, "")
sort.setSort([SortField("float_dv", SortField.Type.FLOAT),
SortField("string", SortField.Type.STRING)])
self._assertMatches(empty, self.queryX, sort, "")
sort.setSort([SortField("string_dv", self._getDVStringSortType(False),
True),
SortField.FIELD_DOC])
self._assertMatches(empty, self.queryX, sort, "")
sort.setSort([SortField("float_dv", SortField.Type.FLOAT),
SortField("string_dv", self._getDVStringSortType(False))])
self._assertMatches(empty, self.queryX, sort, "")
sort.setSort([SortField("float_dv", SortField.Type.FLOAT),
SortField("string_dv", self._getDVStringSortType(False))])
self._assertMatches(empty, self.queryX, sort, "")
def testNewCustomFieldParserSort(self):
"""
Test sorting w/ custom FieldComparator
"""
sort = self.sort
sort.setSort([SortField("parser", MyFieldComparatorSource())])
self._assertMatches(self.full, self.queryA, sort, "JIHGFEDCBA")
def testReverseSort(self):
"""
test sorts in reverse
"""
sort = self.sort
sort.setSort([SortField(None, SortField.Type.SCORE, True),
SortField.FIELD_DOC])
self._assertMatches(self.full, self.queryX, sort, "IEGCA")
self._assertMatches(self.full, self.queryY, sort, "JFHDB")
sort.setSort(SortField(None, SortField.Type.DOC, True))
self._assertMatches(self.full, self.queryX, sort, "IGECA")
self._assertMatches(self.full, self.queryY, sort, "JHFDB")
sort.setSort(SortField("int", SortField.Type.INT, True))
self._assertMatches(self.full, self.queryX, sort, "CAEGI")
self._assertMatches(self.full, self.queryY, sort, "BJFHD")
sort.setSort(SortField("float", SortField.Type.FLOAT, True))
self._assertMatches(self.full, self.queryX, sort, "AECIG")
self._assertMatches(self.full, self.queryY, sort, "BFJHD")
sort.setSort(SortField("string", SortField.Type.STRING, True))
self._assertMatches(self.full, self.queryX, sort, "CEGIA")
self._assertMatches(self.full, self.queryY, sort, "BFHJD")
if self.supportsDocValues:
sort.setSort(SortField("int_dv", SortField.Type.INT, True))
self._assertMatches(self.full, self.queryX, sort, "CAEGI")
self._assertMatches(self.full, self.queryY, sort, "BJFHD")
sort.setSort(SortField("float_dv", SortField.Type.FLOAT, True))
self._assertMatches(self.full, self.queryX, sort, "AECIG")
self._assertMatches(self.full, self.queryY, sort, "BFJHD")
sort.setSort(SortField("string_dv", self._getDVStringSortType(), True))
self._assertMatches(self.full, self.queryX, sort, "CEGIA")
self._assertMatches(self.full, self.queryY, sort, "BFHJD")
def testEmptyFieldSort(self):
"""
test sorting when the sort field is empty(undefined) for some of the
documents
"""
sort = self.sort
sort.setSort(SortField("string", SortField.Type.STRING))
self._assertMatches(self.full, self.queryF, sort, "ZJI")
sort.setSort(SortField("string", SortField.Type.STRING, True))
self._assertMatches(self.full, self.queryF, sort, "IJZ")
sort.setSort(SortField("int", SortField.Type.INT))
self._assertMatches(self.full, self.queryF, sort, "IZJ")
sort.setSort(SortField("int", SortField.Type.INT, True))
self._assertMatches(self.full, self.queryF, sort, "JZI")
sort.setSort(SortField("float", SortField.Type.FLOAT))
self._assertMatches(self.full, self.queryF, sort, "ZJI")
# using a nonexisting field as first sort key shouldn't make a
# difference:
sort.setSort([SortField("nosuchfield", SortField.Type.STRING),
SortField("float", SortField.Type.FLOAT)])
self._assertMatches(self.full, self.queryF, sort, "ZJI")
sort.setSort(SortField("float", SortField.Type.FLOAT, True))
self._assertMatches(self.full, self.queryF, sort, "IJZ")
# When a field is None for both documents, the next SortField should
# be used.
# Works for
sort.setSort([SortField("int", SortField.Type.INT),
SortField("string", SortField.Type.STRING),
SortField("float", SortField.Type.FLOAT)])
self._assertMatches(self.full, self.queryG, sort, "ZWXY")
# Reverse the last criterium to make sure the test didn't pass by
# chance
sort.setSort([SortField("int", SortField.Type.INT),
SortField("string", SortField.Type.STRING),
SortField("float", SortField.Type.FLOAT, True)])
self._assertMatches(self.full, self.queryG, sort, "ZYXW")
# Do the same for a ParallelMultiSearcher
threadPool = Executors.newFixedThreadPool(self.getRandomNumber(2, 8), NamedThreadFactory("testEmptyFieldSort"))
parallelSearcher=IndexSearcher(self.full.getIndexReader(), threadPool)
sort.setSort([SortField("int", SortField.Type.INT),
SortField("string", SortField.Type.STRING),
SortField("float", SortField.Type.FLOAT)])
self._assertMatches(parallelSearcher, self.queryG, sort, "ZWXY")
sort.setSort([SortField("int", SortField.Type.INT),
SortField("string", SortField.Type.STRING),
SortField("float", SortField.Type.FLOAT, True)])
self._assertMatches(parallelSearcher, self.queryG, sort, "ZYXW")
threadPool.shutdown()
threadPool.awaitTermination(1000L, TimeUnit.MILLISECONDS)
def testSortCombos(self):
"""
test sorts using a series of fields
"""
sort = self.sort
sort.setSort([SortField("int", SortField.Type.INT),
SortField("float", SortField.Type.FLOAT)])
self._assertMatches(self.full, self.queryX, sort, "IGEAC")
sort.setSort([SortField("int", SortField.Type.INT, True),
SortField(None, SortField.Type.DOC, True)])
self._assertMatches(self.full, self.queryX, sort, "CEAGI")
sort.setSort([SortField("float", SortField.Type.FLOAT),
SortField("string", SortField.Type.STRING)])
self._assertMatches(self.full, self.queryX, sort, "GICEA")
if self.supportsDocValues:
sort.setSort([SortField("int_dv", SortField.Type.INT),
SortField("float_dv", SortField.Type.FLOAT)])
self._assertMatches(self.full, self.queryX, sort, "IGEAC")
sort.setSort([SortField("int_dv", SortField.Type.INT, True),
SortField(None, SortField.Type.DOC, True)])
self._assertMatches(self.full, self.queryX, sort, "CEAGI")
sort.setSort([SortField("float_dv", SortField.Type.FLOAT),
SortField("string_dv", self._getDVStringSortType())])
self._assertMatches(self.full, self.queryX, sort, "GICEA")
def testParallelMultiSort(self):
"""
test a variety of sorts using a parallel multisearcher
"""
threadPool = Executors.newFixedThreadPool(self.getRandomNumber(2, 8), NamedThreadFactory("testParallelMultiSort"))
searcher = IndexSearcher(MultiReader([self.searchX.getIndexReader(),
self.searchY.getIndexReader()]),
threadPool)
self._runMultiSorts(searcher, False)
threadPool.shutdown();
threadPool.awaitTermination(1000L, TimeUnit.MILLISECONDS);
def testTopDocsScores(self):
"""
There was previously a bug in FieldSortedHitQueue.maxscore when only
a single doc was added. That is what the following tests for.
"""
sort = Sort()
nDocs = 10
# try to pick a query that will result in an unnormalized
# score greater than 1 to test for correct normalization
docs1 = self.full.search(self.queryE, None, nDocs, sort, True, True)
# a filter that only allows through the first hit
class filter(PythonFilter):
def getDocIdSet(_self, context, acceptDocs):
reader = context.reader()
bs = BitSet(reader.maxDoc())
bs.set(0, reader.maxDoc())
bs.set(docs1.scoreDocs[0].doc)
return DocIdBitSet(bs)
docs2 = self.full.search(self.queryE, filter(), nDocs, sort, True, True)
self.assertEqual(docs1.scoreDocs[0].score,
docs2.scoreDocs[0].score,
1e-6)
def testSortWithoutFillFields(self):
"""
There was previously a bug in TopFieldCollector when fillFields was
set to False - the same doc and score was set in ScoreDoc[]
array. This test asserts that if fillFields is False, the documents
are set properly. It does not use Searcher's default search
methods(with Sort) since all set fillFields to True.
"""
sorts = [Sort(SortField.FIELD_DOC), Sort()]
for sort in sorts:
q = MatchAllDocsQuery()
tdc = TopFieldCollector.create(sort, 10, False,
False, False, True)
self.full.search(q, tdc)
sds = tdc.topDocs().scoreDocs
for i in xrange(1, len(sds)):
self.assert_(sds[i].doc != sds[i - 1].doc)
def testSortWithoutScoreTracking(self):
"""
Two Sort criteria to instantiate the multi/single comparators.
"""
sorts = [Sort(SortField.FIELD_DOC), Sort()]
for sort in sorts:
q = MatchAllDocsQuery()
tdc = TopFieldCollector.create(sort, 10, True, False,
False, True)
self.full.search(q, tdc)
tds = tdc.topDocs()
sds = tds.scoreDocs
for sd in sds:
self.assert_(Float.isNaN_(sd.score))
self.assert_(Float.isNaN_(tds.getMaxScore()))
def testSortWithScoreNoMaxScoreTracking(self):
"""
Two Sort criteria to instantiate the multi/single comparators.
"""
sorts = [Sort(SortField.FIELD_DOC), Sort()]
for sort in sorts:
q = MatchAllDocsQuery()
tdc = TopFieldCollector.create(sort, 10, True, True,
False, True)
self.full.search(q, tdc)
tds = tdc.topDocs()
sds = tds.scoreDocs
for sd in sds:
self.assert_(not Float.isNaN_(sd.score))
self.assert_(Float.isNaN_(tds.getMaxScore()))
def testSortWithScoreAndMaxScoreTracking(self):
"""
Two Sort criteria to instantiate the multi/single comparators.
"""
sorts = [Sort(SortField.FIELD_DOC), Sort()]
for sort in sorts:
q = MatchAllDocsQuery()
tdc = TopFieldCollector.create(sort, 10, True, True,
True, True)
self.full.search(q, tdc)
tds = tdc.topDocs()
sds = tds.scoreDocs
for sd in sds:
self.assert_(not Float.isNaN_(sd.score))
self.assert_(not Float.isNaN_(tds.getMaxScore()))
def testOutOfOrderDocsScoringSort(self):
"""
Two Sort criteria to instantiate the multi/single comparators.
"""
sorts = [Sort(SortField.FIELD_DOC), Sort()]
tfcOptions = [[False, False, False],
[False, False, True],
[False, True, False],
[False, True, True],
[True, False, False],
[True, False, True],
[True, True, False],
[True, True, True]]
actualTFCClasses = [
"OutOfOrderOneComparatorNonScoringCollector",
"OutOfOrderOneComparatorScoringMaxScoreCollector",
"OutOfOrderOneComparatorScoringNoMaxScoreCollector",
"OutOfOrderOneComparatorScoringMaxScoreCollector",
"OutOfOrderOneComparatorNonScoringCollector",
"OutOfOrderOneComparatorScoringMaxScoreCollector",
"OutOfOrderOneComparatorScoringNoMaxScoreCollector",
"OutOfOrderOneComparatorScoringMaxScoreCollector"
]
bq = BooleanQuery()
# Add a Query with SHOULD, since bw.scorer() returns BooleanScorer2
# which delegates to BS if there are no mandatory clauses.
bq.add(MatchAllDocsQuery(), BooleanClause.Occur.SHOULD)
# Set minNrShouldMatch to 1 so that BQ will not optimize rewrite to
# return the clause instead of BQ.
bq.setMinimumNumberShouldMatch(1)
for sort in sorts:
for tfcOption, actualTFCClass in izip(tfcOptions,
actualTFCClasses):
tdc = TopFieldCollector.create(sort, 10, tfcOption[0],
tfcOption[1], tfcOption[2],
False)
self.assert_(tdc.getClass().getName().endswith("$" + actualTFCClass))
self.full.search(bq, tdc)
tds = tdc.topDocs()
sds = tds.scoreDocs
self.assertEqual(10, len(sds))
def testSortWithScoreAndMaxScoreTrackingNoResults(self):
"""
Two Sort criteria to instantiate the multi/single comparators.
"""
sorts = [Sort(SortField.FIELD_DOC), Sort()]
for sort in sorts:
tdc = TopFieldCollector.create(sort, 10, True, True, True, True)
tds = tdc.topDocs()
self.assertEqual(0, tds.totalHits)
self.assert_(Float.isNaN_(tds.getMaxScore()))
def _runMultiSorts(self, multi, isFull):
"""
runs a variety of sorts useful for multisearchers
"""
sort = self.sort
sort.setSort(SortField.FIELD_DOC)
expected = isFull and "ABCDEFGHIJ" or "ACEGIBDFHJ"
self._assertMatches(multi, self.queryA, sort, expected)
sort.setSort(SortField("int", SortField.Type.INT))
expected = isFull and "IDHFGJABEC" or "IDHFGJAEBC"
self._assertMatches(multi, self.queryA, sort, expected)
sort.setSort([SortField("int", SortField.Type.INT), SortField.FIELD_DOC])
expected = isFull and "IDHFGJABEC" or "IDHFGJAEBC"
self._assertMatches(multi, self.queryA, sort, expected)
sort.setSort(SortField("int", SortField.Type.INT))
expected = isFull and "IDHFGJABEC" or "IDHFGJAEBC"
self._assertMatches(multi, self.queryA, sort, expected)
sort.setSort([SortField("float", SortField.Type.FLOAT), SortField.FIELD_DOC])
self._assertMatches(multi, self.queryA, sort, "GDHJCIEFAB")
sort.setSort(SortField("float", SortField.Type.FLOAT))
self._assertMatches(multi, self.queryA, sort, "GDHJCIEFAB")
sort.setSort(SortField("string", SortField.Type.STRING))
self._assertMatches(multi, self.queryA, sort, "DJAIHGFEBC")
sort.setSort(SortField("int", SortField.Type.INT, True))
expected = isFull and "CABEJGFHDI" or "CAEBJGFHDI"
self._assertMatches(multi, self.queryA, sort, expected)
sort.setSort(SortField("float", SortField.Type.FLOAT, True))
self._assertMatches(multi, self.queryA, sort, "BAFECIJHDG")
sort.setSort(SortField("string", SortField.Type.STRING, True))
self._assertMatches(multi, self.queryA, sort, "CBEFGHIAJD")
sort.setSort([SortField("int", SortField.Type.INT),
SortField("float", SortField.Type.FLOAT)])
self._assertMatches(multi, self.queryA, sort, "IDHFGJEABC")
sort.setSort([SortField("float", SortField.Type.FLOAT),
SortField("string", SortField.Type.STRING)])
self._assertMatches(multi, self.queryA, sort, "GDHJICEFAB")
sort.setSort(SortField("int", SortField.Type.INT))
self._assertMatches(multi, self.queryF, sort, "IZJ")
sort.setSort(SortField("int", SortField.Type.INT, True))
self._assertMatches(multi, self.queryF, sort, "JZI")
sort.setSort(SortField("float", SortField.Type.FLOAT))
self._assertMatches(multi, self.queryF, sort, "ZJI")
sort.setSort(SortField("string", SortField.Type.STRING))
self._assertMatches(multi, self.queryF, sort, "ZJI")
sort.setSort(SortField("string", SortField.Type.STRING, True))
self._assertMatches(multi, self.queryF, sort, "IJZ")
if self.supportsDocValues:
sort.setSort(SortField("int_dv", SortField.Type.INT))
expected = isFull and "IDHFGJABEC" or "IDHFGJAEBC"
self._assertMatches(multi, self.queryA, sort, expected)
sort.setSort([SortField("int_dv", SortField.Type.INT),
SortField.FIELD_DOC])
expected = isFull and "IDHFGJABEC" or "IDHFGJAEBC"
self._assertMatches(multi, self.queryA, sort, expected)
sort.setSort(SortField("int_dv", SortField.Type.INT))
expected = isFull and "IDHFGJABEC" or "IDHFGJAEBC"
self._assertMatches(multi, self.queryA, sort, expected)
sort.setSort([SortField("float_dv", SortField.Type.FLOAT),
SortField.FIELD_DOC])
self._assertMatches(multi, self.queryA, sort, "GDHJCIEFAB")
sort.setSort(SortField("float_dv", SortField.Type.FLOAT))
self._assertMatches(multi, self.queryA, sort, "GDHJCIEFAB")
sort.setSort(SortField("int_dv", SortField.Type.INT, True))
expected = isFull and "CABEJGFHDI" or "CAEBJGFHDI"
self._assertMatches(multi, self.queryA, sort, expected)
sort.setSort([SortField("int_dv", SortField.Type.INT),
SortField("float_dv", SortField.Type.FLOAT)])
self._assertMatches(multi, self.queryA, sort, "IDHFGJEABC")
sort.setSort(SortField("int_dv", SortField.Type.INT))
self._assertMatches(multi, self.queryF, sort, "IZJ")
sort.setSort(SortField("int_dv", SortField.Type.INT, True))
self._assertMatches(multi, self.queryF, sort, "JZI")
sort.setSort(SortField("string_dv", self._getDVStringSortType()))
self._assertMatches(multi, self.queryA, sort, "DJAIHGFEBC")
sort.setSort(SortField("string_dv", self._getDVStringSortType(), True))
self._assertMatches(multi, self.queryA, sort, "CBEFGHIAJD")
sort.setSort([SortField("float_dv", SortField.Type.FLOAT),
SortField("string_dv", self._getDVStringSortType())])
self._assertMatches(multi, self.queryA, sort, "GDHJICEFAB")
sort.setSort(SortField("string_dv", self._getDVStringSortType()))
self._assertMatches(multi, self.queryF, sort, "ZJI")
sort.setSort(SortField("string_dv", self._getDVStringSortType(), True))
self._assertMatches(multi, self.queryF, sort, "IJZ")
# up to this point, all of the searches should have "sane"
# FieldCache behavior, and should have reused hte cache in several
# cases
self._assertSaneFieldCaches(self.getName() + " various")
FieldCache.DEFAULT.purgeAllCaches()
def _assertMatches(self, searcher, query, sort, expectedResult):
"""
make sure the documents returned by the search match the expected
list
"""
# ScoreDoc[] result = searcher.search(query, None, 1000, sort).scoreDocs
hits = searcher.search(query, None, len(expectedResult) or 1, sort)
sds = hits.scoreDocs
self.assertEqual(hits.totalHits, len(expectedResult))
buff = []
for sd in sds:
doc = searcher.doc(sd.doc)
v = doc.getValues("tracer")
for _v in v:
buff.append(_v)
self.assertEqual(expectedResult, ''.join(buff))
def getScores(self, hits, searcher):
scoreMap = {}
for hit in hits:
doc = searcher.doc(hit.doc)
v = doc.getValues("tracer")
self.assertEqual(len(v), 1)
scoreMap[v[0]] = hit.score
return scoreMap
def _assertSameValues(self, m1, m2):
"""
make sure all the values in the maps match
"""
self.assertEquals(len(m1), len(m2))
for key in m1.iterkeys():
self.assertEquals(m1[key], m2[key], 1e-6)
def getName(self):
return type(self).__name__
def _assertSaneFieldCaches(self, msg):
entries = FieldCache.DEFAULT.getCacheEntries()
insanity = FieldCacheSanityChecker.checkSanity(entries)
if insanity:
print [x for x in insanity]
self.assertEqual(0, len(insanity),
msg + ": Insane FieldCache usage(s) found")
class MyFieldComparator(PythonFieldComparator):
def __init__(self, numHits):
super(MyFieldComparator, self).__init__()
self.slotValues = [0] * numHits
def copy(self, slot, doc):
self.slotValues[slot] = self.docValues.get(doc)
def compare(self, slot1, slot2):
return self.slotValues[slot1] - self.slotValues[slot2]
def compareBottom(self, doc):
return self.bottomValue - self.docValues.get(doc)
def setBottom(self, bottom):
self.bottomValue = self.slotValues[bottom]
def setNextReader(self, context):
class intParser(PythonIntParser):
def parseInt(_self, val):
return (val.bytes[val.offset] - ord('A')) * 123456
def termsEnum(_self, terms):
return terms.iterator(None)
self.docValues = FieldCache.DEFAULT.getInts(context.reader(), "parser",
intParser(), False)
return self
def value(self, slot):
return Integer(self.slotValues[slot])
def compareDocToValue(self, doc, valueObj):
value = valueObj.intValue()
docValue = self.docValues.get(doc)
# values are small enough that overflow won't happen
return docValue - value
class MyFieldComparatorSource(PythonFieldComparatorSource):
def newComparator(self, fieldname, numHits, sortPos, reversed):
# keep an extra ref since this object seems to be passed around
# back and forth without a reference being kept on the java side
self.saved = MyFieldComparator(numHits)
return self.saved
if __name__ == "__main__":
env = lucene.initVM(vmargs=['-Djava.awt.headless=true'])
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
unittest.main()
except:
pass
# refs = sorted(env._dumpRefs(classes=True).items(),
# key=lambda x: x[1], reverse=True)
# print refs[0:4]
else:
unittest.main()
pylucene-4.10.1-1/test/test_StopAnalyzer.py 000644 000765 000000 00000007076 12162654000 021033 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import sys, lucene, unittest
from java.io import StringReader
from org.apache.lucene.analysis.core import StopAnalyzer, StopFilter
from org.apache.lucene.analysis.tokenattributes import \
CharTermAttribute, PositionIncrementAttribute
from org.apache.lucene.util import Version
class StopAnalyzerTestCase(unittest.TestCase):
"""
Unit tests ported from Java Lucene
"""
def setUp(self):
self.stop = StopAnalyzer(Version.LUCENE_CURRENT)
self.invalidTokens = StopAnalyzer.ENGLISH_STOP_WORDS_SET
def testDefaults(self):
self.assert_(self.stop is not None)
reader = StringReader("This is a test of the english stop analyzer")
stream = self.stop.tokenStream("test", reader)
self.assert_(stream is not None)
stream.reset()
termAtt = stream.getAttribute(CharTermAttribute.class_)
while stream.incrementToken():
self.assert_(termAtt.toString() not in self.invalidTokens)
def testStopList(self):
stopWords = ["good", "test", "analyzer"]
stopWordsSet = StopFilter.makeStopSet(Version.LUCENE_CURRENT,
stopWords)
newStop = StopAnalyzer(Version.LUCENE_40, stopWordsSet)
reader = StringReader("This is a good test of the english stop analyzer")
stream = newStop.tokenStream("test", reader)
self.assert_(stream is not None)
stream.reset()
termAtt = stream.getAttribute(CharTermAttribute.class_)
while stream.incrementToken():
text = termAtt.toString()
self.assert_(text not in stopWordsSet)
def testStopListPositions(self):
stopWords = ["good", "test", "analyzer"]
stopWordsSet = StopFilter.makeStopSet(Version.LUCENE_CURRENT,
stopWords)
newStop = StopAnalyzer(Version.LUCENE_CURRENT, stopWordsSet)
reader = StringReader("This is a good test of the english stop analyzer with positions")
expectedIncr = [ 1, 1, 1, 3, 1, 1, 1, 2, 1]
stream = newStop.tokenStream("test", reader)
self.assert_(stream is not None)
stream.reset()
i = 0
termAtt = stream.getAttribute(CharTermAttribute.class_)
posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class_)
while stream.incrementToken():
text = termAtt.toString()
self.assert_(text not in stopWordsSet)
self.assertEqual(expectedIncr[i],
posIncrAtt.getPositionIncrement())
i += 1
if __name__ == "__main__":
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
unittest.main()
except:
pass
else:
unittest.main()
pylucene-4.10.1-1/test/test_StopWords.py 000644 000765 000000 00000003461 12162654000 020336 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import sys, lucene, unittest
from java.io import StringReader
from org.apache.lucene.analysis.core import StopFilter
from org.apache.lucene.analysis.standard import StandardTokenizer
from org.apache.lucene.util import Version
# run with -loop to test fix for string local ref leak reported
# by Aaron Lav.
class StopWordsTestCase(unittest.TestCase):
def setUp(self):
stopWords = ['the', 'and', 's']
self.stop_set = StopFilter.makeStopSet(Version.LUCENE_CURRENT,
stopWords)
self.reader = StringReader('foo')
def testStopWords(self):
try:
result = StandardTokenizer(Version.LUCENE_CURRENT, self.reader)
result = StopFilter(Version.LUCENE_CURRENT, result, self.stop_set)
except Exception, e:
self.fail(str(e))
if __name__ == "__main__":
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
unittest.main()
except:
pass
else:
unittest.main()
pylucene-4.10.1-1/test/test_TermRangeFilter.py 000644 000765 000000 00000020112 12162654000 021414 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import sys, lucene, unittest
from BaseTestRangeFilter import BaseTestRangeFilter
from org.apache.lucene.analysis.core import SimpleAnalyzer
from org.apache.lucene.document import Document, Field, StringField
from org.apache.lucene.index import Term
from org.apache.lucene.store import RAMDirectory
from org.apache.lucene.search import TermQuery, TermRangeFilter
#
# A basic 'positive' Unit test class for the TermRangeFilter class.
#
# NOTE: at the moment, this class only tests for 'positive' results,
# it does not verify the results to ensure there are no 'false positives',
# nor does it adequately test 'negative' results. It also does not test
# that garbage in results in an Exception.
#
def _trf(*args):
return TermRangeFilter.newStringRange(*args)
class TestTermRangeFilter(BaseTestRangeFilter):
def testRangeFilterId(self):
index = self.signedIndex
reader = self.getReader(directory=index.index);
search = self.getSearcher(reader=reader)
medId = ((self.maxId - self.minId) / 2)
minIP = self.pad(self.minId)
maxIP = self.pad(self.maxId)
medIP = self.pad(medId)
numDocs = reader.numDocs()
self.assertEqual(numDocs, 1 + self.maxId - self.minId, "num of docs")
q = TermQuery(Term("body","body"))
# test id, bounded on both ends
result = search.search(q, _trf("id", minIP, maxIP, True, True), 50)
self.assertEqual(numDocs, result.totalHits, "find all")
result = search.search(q, _trf("id", minIP, maxIP, True, False), 50)
self.assertEqual(numDocs - 1, result.totalHits, "all but last")
result = search.search(q, _trf("id", minIP, maxIP, False, True), 50)
self.assertEqual(numDocs - 1, result.totalHits, "all but first")
result = search.search(q, _trf("id", minIP, maxIP, False, False), 50)
self.assertEqual(numDocs - 2, result.totalHits, "all but ends")
result = search.search(q, _trf("id", medIP, maxIP, True, True), 50)
self.assertEqual(1 + self.maxId - medId, result.totalHits, "med and up")
result = search.search(q, _trf("id", minIP, medIP, True, True), 50)
self.assertEqual(1 + medId - self.minId, result.totalHits, "up to med")
# unbounded id
result = search.search(q, _trf("id", minIP, None, True, False), 50)
self.assertEqual(numDocs, result.totalHits, "min and up")
result = search.search(q, _trf("id", None, maxIP, False, True), 50)
self.assertEqual(numDocs, result.totalHits, "max and down")
result = search.search(q, _trf("id", minIP, None, False, False), 50)
self.assertEqual(numDocs - 1, result.totalHits, "not min, but up")
result = search.search(q, _trf("id", None, maxIP, False, False), 50)
self.assertEqual(numDocs - 1, result.totalHits, "not max, but down")
result = search.search(q, _trf("id",medIP, maxIP, True, False), 50)
self.assertEqual(self.maxId - medId, result.totalHits, "med and up, not max")
result = search.search(q, _trf("id", minIP, medIP, False, True), 50)
self.assertEqual(medId - self.minId, result.totalHits, "not min, up to med")
# very small sets
result = search.search(q, _trf("id", minIP, minIP, False, False), 50)
self.assertEqual(0, result.totalHits, "min, min, False, False")
result = search.search(q, _trf("id", medIP, medIP, False, False), 50)
self.assertEqual(0, result.totalHits, "med, med, False, False")
result = search.search(q, _trf("id", maxIP, maxIP, False, False), 50)
self.assertEqual(0, result.totalHits, "max, max, False, False")
result = search.search(q, _trf("id", minIP, minIP, True, True), 50)
self.assertEqual(1, result.totalHits, "min, min, True, True")
result = search.search(q, _trf("id", None, minIP, False, True), 50)
self.assertEqual(1, result.totalHits, "nul, min, False, True")
result = search.search(q, _trf("id", maxIP, maxIP, True, True), 50)
self.assertEqual(1, result.totalHits, "max, max, True, True")
result = search.search(q, _trf("id", maxIP, None, True, False), 50)
self.assertEqual(1, result.totalHits, "max, nul, True, True")
result = search.search(q, _trf("id", medIP, medIP, True, True), 50)
self.assertEqual(1, result.totalHits, "med, med, True, True")
def testRangeFilterRand(self):
index = self.signedIndex
reader = self.getReader(directory=index.index)
search = self.getSearcher(reader=reader)
minRP = self.pad(index.minR)
maxRP = self.pad(index.maxR)
numDocs = reader.numDocs()
self.assertEqual(numDocs, 1 + self.maxId - self.minId, "num of docs")
q = TermQuery(Term("body", "body"))
# test extremes, bounded on both ends
result = search.search(q, _trf("rand", minRP, maxRP, True, True), 50)
self.assertEqual(numDocs, result.totalHits, "find all")
result = search.search(q, _trf("rand", minRP, maxRP, True, False), 50)
self.assertEqual(numDocs - 1, result.totalHits, "all but biggest")
result = search.search(q, _trf("rand", minRP, maxRP, False, True), 50)
self.assertEqual(numDocs - 1, result.totalHits, "all but smallest")
result = search.search(q, _trf("rand", minRP, maxRP, False, False), 50)
self.assertEqual(numDocs - 2, result.totalHits, "all but extremes")
# unbounded
result = search.search(q, _trf("rand", minRP, None, True, False), 50)
self.assertEqual(numDocs, result.totalHits, "smallest and up")
result = search.search(q, _trf("rand", None, maxRP, False, True), 50)
self.assertEqual(numDocs, result.totalHits, "biggest and down")
result = search.search(q, _trf("rand", minRP, None, False, False), 50)
self.assertEqual(numDocs - 1, result.totalHits, "not smallest, but up")
result = search.search(q, _trf("rand", None, maxRP, False, False), 50)
self.assertEqual(numDocs - 1, result.totalHits, "not biggest, but down")
# very small sets
result = search.search(q, _trf("rand", minRP, minRP, False, False), 50)
self.assertEqual(0, result.totalHits, "min, min, False, False")
result = search.search(q, _trf("rand", maxRP, maxRP, False, False), 50)
self.assertEqual(0, result.totalHits, "max, max, False, False")
result = search.search(q, _trf("rand", minRP, minRP, True, True), 50)
self.assertEqual(1, result.totalHits, "min, min, True, True")
result = search.search(q, _trf("rand", None, minRP, False, True), 50)
self.assertEqual(1, result.totalHits, "nul, min, False, True")
result = search.search(q, _trf("rand", maxRP, maxRP, True, True), 50)
self.assertEqual(1, result.totalHits, "max, max, True, True")
result = search.search(q, _trf("rand", maxRP, None, True, False), 50)
self.assertEqual(1, result.totalHits, "max, nul, True, True")
if __name__ == "__main__":
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
unittest.main()
except:
pass
else:
unittest.main()
pylucene-4.10.1-1/test/test_TermRangeQuery.py 000644 000765 000000 00000007402 12162654000 021303 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import sys, lucene, unittest
from PyLuceneTestCase import PyLuceneTestCase
from org.apache.lucene.document import Document, Field, StringField, TextField
from org.apache.lucene.index import IndexWriterConfig
from org.apache.lucene.search import TermRangeQuery
class TermRangeQueryTestCase(PyLuceneTestCase):
"""
Unit tests ported from Java Lucene
"""
def setUp(self):
super(TermRangeQueryTestCase, self).setUp()
self.docCount = 0
def _initializeIndex(self, values):
writer = self.getWriter()
for value in values:
self._insertDoc(writer, value)
writer.close()
def _insertDoc(self, writer, content):
doc = Document()
doc.add(Field("id", "id" + str(self.docCount),
StringField.TYPE_STORED))
doc.add(Field("content", content,
TextField.TYPE_NOT_STORED))
writer.addDocument(doc)
self.docCount += 1
def _addDoc(self, content):
writer = self.getWriter(open_mode=IndexWriterConfig.OpenMode.APPEND)
self._insertDoc(writer, content)
writer.close()
def testExclusive(self):
query = TermRangeQuery.newStringRange("content", "A", "C", False, False)
self._initializeIndex(["A", "B", "C", "D"])
searcher = self.getSearcher()
topDocs = searcher.search(query, 50)
self.assertEqual(1, topDocs.totalHits,
"A,B,C,D, only B in range")
del searcher
self._initializeIndex(["A", "B", "D"])
searcher = self.getSearcher()
topDocs = searcher.search(query, 50)
self.assertEqual(1, topDocs.totalHits,
"A,B,D, only B in range")
del searcher
self._addDoc("C")
searcher = self.getSearcher()
topDocs = searcher.search(query, 50)
self.assertEqual(1, topDocs.totalHits,
"C added, still only B in range")
del searcher
def testInclusive(self):
query = TermRangeQuery.newStringRange("content", "A", "C", True, True)
self._initializeIndex(["A", "B", "C", "D"])
searcher = self.getSearcher()
topDocs = searcher.search(query, 50)
self.assertEqual(3, topDocs.totalHits,
"A,B,C,D - A,B,C in range")
del searcher
self._initializeIndex(["A", "B", "D"])
searcher = self.getSearcher()
topDocs = searcher.search(query, 50)
self.assertEqual(2, topDocs.totalHits,
"A,B,D - A and B in range")
del searcher
self._addDoc("C")
searcher = self.getSearcher()
topDocs = searcher.search(query, 50)
self.assertEqual(3, topDocs.totalHits,
"C added - A, B, C in range")
del searcher
if __name__ == "__main__":
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
unittest.main()
except:
pass
else:
unittest.main()
pylucene-4.10.1-1/test/test_ThaiAnalyzer.py 000644 000765 000000 00000015265 12162654000 020772 0 ustar 00vajda wheel 000000 000000 # -*- coding: utf-8 -*-
# ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
import sys, lucene, unittest
from BaseTokenStreamTestCase import BaseTokenStreamTestCase
from java.io import StringReader
from org.apache.lucene.analysis.th import ThaiAnalyzer, ThaiWordFilter
from org.apache.lucene.analysis.util import CharArraySet
from org.apache.lucene.util import Version
class ThaiAnalyzerTestCase(BaseTokenStreamTestCase):
def testOffsets(self):
self.assert_(ThaiWordFilter.DBBI_AVAILABLE,
"JRE does not support Thai dictionary-based BreakIterator")
self._assertAnalyzesTo(ThaiAnalyzer(Version.LUCENE_CURRENT, CharArraySet.EMPTY_SET),
u"การที่ได้ต้องแสดงว่างานดี",
[ u"การ", u"ที่", u"ได้", u"ต้อง", u"แสดง",
u"ว่า", u"งาน", u"ดี" ],
[ 0, 3, 6, 9, 13, 17, 20, 23 ],
[ 3, 6, 9, 13, 17, 20, 23, 25 ])
def testTokenType(self):
self.assert_(ThaiWordFilter.DBBI_AVAILABLE,
"JRE does not support Thai dictionary-based BreakIterator")
self._assertAnalyzesTo(ThaiAnalyzer(Version.LUCENE_35),
u"การที่ได้ต้องแสดงว่างานดี ๑๒๓",
[ u"การ", u"ที่", u"ได้", u"ต้อง", u"แสดง",
u"ว่า", u"งาน", u"ดี", u"๑๒๓" ],
None, None,
[ "", "",
"", "",
"", "",
"", "",
"" ])
def testPositionIncrements(self):
self.assert_(ThaiWordFilter.DBBI_AVAILABLE,
"JRE does not support Thai dictionary-based BreakIterator")
analyzer = ThaiAnalyzer(Version.LUCENE_35)
self._assertAnalyzesTo(analyzer, u"การที่ได้ต้อง the แสดงว่างานดี",
[ u"การ", u"ที่", u"ได้", u"ต้อง", u"แสดง",
u"ว่า", u"งาน", u"ดี" ],
[ 0, 3, 6, 9, 18, 22, 25, 28 ],
[ 3, 6, 9, 13, 22, 25, 28, 30 ],
None,
[ 1, 1, 1, 1, 2, 1, 1, 1 ])
# case that a stopword is adjacent to thai text, with no whitespace
self._assertAnalyzesTo(analyzer, u"การที่ได้ต้องthe แสดงว่างานดี",
[ u"การ", u"ที่", u"ได้", u"ต้อง", u"แสดง",
u"ว่า", u"งาน", u"ดี" ],
[ 0, 3, 6, 9, 17, 21, 24, 27 ],
[ 3, 6, 9, 13, 21, 24, 27, 29 ],
None,
[ 1, 1, 1, 1, 2, 1, 1, 1 ])
def testAnalyzer30(self):
analyzer = ThaiAnalyzer(Version.LUCENE_30)
self._assertAnalyzesTo(analyzer, u"การที่ได้ต้อง the แสดงว่างานดี",
[ u"การ", u"ที่", u"ได้", u"ต้อง", u"แสดง",
u"ว่า", u"งาน", u"ดี" ],
[ 0, 3, 6, 9, 18, 22, 25, 28 ],
[ 3, 6, 9, 13, 22, 25, 28, 30 ],
None,
[ 1, 1, 1, 1, 2, 1, 1, 1 ])
# case that a stopword is adjacent to thai text, with no whitespace
self._assertAnalyzesTo(analyzer, u"การที่ได้ต้องthe แสดงว่างานดี",
[ u"การ", u"ที่", u"ได้", u"ต้อง", u"แสดง",
u"ว่า", u"งาน", u"ดี" ],
[ 0, 3, 6, 9, 17, 21, 24, 27 ],
[ 3, 6, 9, 13, 21, 24, 27, 29 ],
None,
[ 1, 1, 1, 1, 2, 1, 1, 1 ])
def testAnalyzer30(self):
analyzer = ThaiAnalyzer(Version.LUCENE_30)
self._assertAnalyzesTo(analyzer, u"", [])
self._assertAnalyzesTo(analyzer,
u"การที่ได้ต้องแสดงว่างานดี",
[ u"การ", u"ที่", u"ได้", u"ต้อง",
u"แสดง", u"ว่า", u"งาน", u"ดี" ])
self._assertAnalyzesTo(analyzer,
u"บริษัทชื่อ XY&Z - คุยกับ xyz@demo.com",
[ u"บริษัท", u"ชื่อ", u"xy&z", u"คุย", u"กับ", u"xyz@demo.com" ])
# English stop words
self._assertAnalyzesTo(analyzer,
u"ประโยคว่า The quick brown fox jumped over the lazy dogs",
[ u"ประโยค", u"ว่า", u"quick", u"brown", u"fox",
u"jumped", u"over", u"lazy", u"dogs" ])
if __name__ == "__main__":
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
if ThaiWordFilter.DBBI_AVAILABLE:
if '-loop' in sys.argv:
sys.argv.remove('-loop')
while True:
try:
unittest.main()
except:
pass
else:
unittest.main()
else:
print >>sys.stderr, "Thai not supported by this JVM, tests skipped"
pylucene-4.10.1-1/samples/FacetExample.py 000644 000765 000000 00000030052 12356514472 020367 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
#
# Author: Thomas Koch
#
# FacetExample.py - a simple Facet example for PyLucene
# (originally based on the Java counterpart from
# package org.apache.lucene.facet.example.simple
# later updated to new Facet API)
# ====================================================================
usage = """
usage: python FacetExample.py [index | simple | drilldown]
where
'index' => create index for faceted search
'simple' => run simple faceted search
'drilldown' => run faceted search with drilldown
"""
INDEX_DIR = "FacetExample.Index"
TAXONOMY_DIR = "FacetExample.Taxonomy"
import os, sys, lucene
from java.io import File
from java.lang import System
from java.text import DecimalFormat
from java.util import Arrays
from org.apache.lucene.util import Version
from org.apache.lucene.analysis.core import WhitespaceAnalyzer
from org.apache.lucene.search import IndexSearcher, TermQuery, MatchAllDocsQuery
from org.apache.lucene.store import FSDirectory, SimpleFSDirectory
from org.apache.lucene.index import (IndexWriter, IndexReader,
DirectoryReader, Term,
IndexWriterConfig)
from org.apache.lucene.document import Document, Field, TextField
from org.apache.lucene.facet import DrillSideways, DrillDownQuery
from org.apache.lucene.facet import (Facets, FacetField, FacetResult,
FacetsConfig, FacetsCollector)
from org.apache.lucene.facet.taxonomy import FastTaxonomyFacetCounts
from org.apache.lucene.facet.taxonomy.directory import (DirectoryTaxonomyWriter,
DirectoryTaxonomyReader)
# -----------------------------------------------------------------------------
# SimpleUtils:
# Documents title field
TITLE = "title"
TEXT = "text"
docTexts = [
"The white car is the one I want.", # doc nr.0
"The white dog does not belong to anyone." # doc nr.1
]
# sample documents titles (for the title field).
docTitles = [
"white car", # doc nr.0
"white dog", # doc nr.1
]
# Authors: author[n] == Author of n-th document
# example for simple, single-value facet
authors = [
"Bob", # doc nr.0
"Lisa" # doc nr.1
]
# Categories: categories[D][N] == category-path no. N for document no. D.
# example for hierarchical multi-value facet
categories = [
[["root","a","f1"], ["root","a","f2"]], # doc nr.0
[["root","a","f1"], ["root","a","f3"]] # doc nr.1
]
# samples for (drilldown) search
searchValues = ['white', 'car']
drilldownCategories = [["root","a","f1"], ["root","a","f2"]]
# -----------------------------------------------------------------------------
# Sample indexer creates an index, and adds to it sample documents and facets.
class SimpleIndexer(object):
def index (cls, indexDir, taxoDir, facets_config):
"""Create an index, and adds to it sample documents and facets.
indexDir Directory in which the index should be created.
taxoDir Directory in which the taxonomy index should be created.
"""
# create and open an index writer
config = IndexWriterConfig(Version.LUCENE_48,
WhitespaceAnalyzer(Version.LUCENE_48))
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
iw = IndexWriter(indexDir, config)
# create and open a taxonomy writer
taxo = DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE)
# loop over sample documents
nDocsAdded = 0
nFacetsAdded = 0
for docNum in range(len(docTexts)):
# create a plain Lucene document and add some regular Lucene fields to it
doc = Document()
doc.add(TextField(TITLE, docTitles[docNum], Field.Store.YES))
doc.add(TextField(TEXT, docTexts[docNum], Field.Store.NO))
# obtain the sample facets for current document
facets = categories[docNum]
author = authors[docNum]
# ... and use the FacetField class for adding facet fields to
# the Lucene document (and via FacetsConfig to the taxonomy index)
doc.add(FacetField("Author", author))
for f in facets:
doc.add(FacetField("Categories", f))
# finally add the document to the index
iw.addDocument(facets_config.build(taxo, doc))
nDocsAdded += 1
# close the taxonomy index and the index - all modifications are
# now safely in the provided directories: indexDir and taxoDir.
iw.close()
taxo.close()
print "Indexed %d documents with facets." % nDocsAdded
index = classmethod(index)
# -----------------------------------------------------------------------------
# SimpleSearcer searches index with facets.
class SimpleSearcher(object):
def searchWithFacets(cls, indexReader, taxoReader, facets_config):
"""
Search an index with facets.
return a list of FacetResult instances
"""
# MatchAllDocsQuery is for "browsing" (counts facets for all non-deleted docs in the index)
query = MatchAllDocsQuery()
return cls.searchWithQuery(query, indexReader, taxoReader, facets_config)
def searchWithTerm(cls, query, indexReader, taxoReader, facets_config):
"""
Search an index with facets by using simple term query
return a list of FacetResult instances
"""
query = TermQuery(Term(TEXT, query))
return cls.searchWithQuery(query, indexReader, taxoReader, facets_config)
def searchWithQuery(cls, query, indexReader, taxoReader, facets_config):
"""
Search an index with facets for a given query
return a list of FacetResult instances
"""
# prepare searcher to search against
searcher = IndexSearcher(indexReader)
# create a FacetsCollector to use in our facetted search:
facets_collector = FacetsCollector()
FacetsCollector.search(searcher, query, 10, facets_collector)
# Count both "Categories" and "Author" dimensions
facets = FastTaxonomyFacetCounts(taxoReader, facets_config, facets_collector)
results = []
facet_result = facets.getTopChildren(10, "Categories")
if facet_result:
results.append(facet_result)
print "Categories: ", facet_result.childCount
for lv in facet_result.labelValues:
print " '%s' (%s)" % (lv.label, lv.value)
facet_result = facets.getTopChildren(10, "Categories", "root", "a")
if facet_result:
results.append(facet_result)
print "Root-a-Categories: ", facet_result.childCount
for lv in facet_result.labelValues:
print " '%s' (%s)" % (lv.label, lv.value)
facet_result = facets.getTopChildren(10, "Author")
if facet_result:
results.append(facet_result)
print "Author: ", facet_result.childCount
for lv in facet_result.labelValues:
print " '%s' (%s)" % (lv.label, lv.value)
return results
def searchWithDrillDown(cls, drilldownCategory, indexReader, taxoReader, facets_config):
"""
Search an index with facets drill-down.
return a list of FacetResult instances
"""
# User drills down on 'Categories' "root/a/f1" and we return facets for 'Author'
searcher = IndexSearcher(indexReader)
# Passing no baseQuery means we drill down on all documents ("browse only"):
query = DrillDownQuery(facets_config)
# Now user drills down on Publish Date/2010:
query.add("Categories", drilldownCategory)
facets_collector = FacetsCollector()
FacetsCollector.search(searcher, query, 10, facets_collector)
# Retrieve results
facets = FastTaxonomyFacetCounts(taxoReader, facets_config, facets_collector)
facet_result = facets.getTopChildren(10, "Author")
print "Author: ", facet_result.childCount
for lv in facet_result.labelValues:
print " '%s' (%s)" % (lv.label, lv.value)
return facet_result
searchWithFacets = classmethod(searchWithFacets)
searchWithTerm = classmethod(searchWithTerm)
searchWithQuery = classmethod(searchWithQuery)
searchWithDrillDown = classmethod(searchWithDrillDown)
# -----------------------------------------------------------------------------
class FacetExample(object):
def __init__(self, directory):
self.directory = directory
# create Directories for the search index and for the taxonomy index
# in RAM or on Disc
#indexDir = RAMDirectory()
#taxoDir = RAMDirectory()
self.indexDir = FSDirectory.open(File(os.path.join(self.directory,
INDEX_DIR)))
self.taxoDir = FSDirectory.open(File(os.path.join(self.directory,
TAXONOMY_DIR)))
# FacetConfig
self.facets_config = FacetsConfig()
self.facets_config.setHierarchical("Categories", True)
self.facets_config.setMultiValued("Categories", True)
def createIndex(self):
# index the sample documents
SimpleIndexer.index(self.indexDir, self.taxoDir, self.facets_config)
def runSimple(self):
# open readers
taxo = DirectoryTaxonomyReader(self.taxoDir)
indexReader = DirectoryReader.open(self.indexDir)
for term in searchValues:
print "\nsearch by term '%s' ..." % term
facetRes = SimpleSearcher.searchWithTerm(term, indexReader, taxo,
self.facets_config)
print "\nsearch all documents ..."
facetRes = SimpleSearcher.searchWithFacets(indexReader, taxo,
self.facets_config)
# close readers
taxo.close()
indexReader.close()
# return result
return facetRes
def runDrillDown(self):
# open readers
taxo = DirectoryTaxonomyReader(self.taxoDir)
indexReader = DirectoryReader.open(self.indexDir)
for drilldown in drilldownCategories:
print "search with drilldown: %s" % '/'.join(drilldown)
facetRes = SimpleSearcher.searchWithDrillDown(drilldown, indexReader,
taxo, self.facets_config)
# close readers
taxo.close()
indexReader.close()
# return result
return facetRes
def main(cls, argv):
baseDir = os.path.dirname(os.path.abspath(argv[0]))
if len(argv) > 1:
index = simple = drilldown = False
for arg in argv[1:]:
if arg == "index":
index = True
elif arg == "simple":
simple = True
elif arg == "drilldown":
drilldown = True
else:
sys.exit(usage+"\nunknown argument: %s" % arg)
else:
index = simple = True
drilldown = False
example = FacetExample(baseDir)
if index:
example.createIndex()
if simple:
example.runSimple()
if drilldown:
example.runDrillDown()
main = classmethod(main)
if __name__ == '__main__':
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
FacetExample.main(sys.argv)
pylucene-4.10.1-1/samples/IndexFiles.py 000644 000765 000000 00000007257 12203673435 020072 0 ustar 00vajda wheel 000000 000000 #!/usr/bin/env python
INDEX_DIR = "IndexFiles.index"
import sys, os, lucene, threading, time
from datetime import datetime
from java.io import File
from org.apache.lucene.analysis.miscellaneous import LimitTokenCountAnalyzer
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.document import Document, Field, FieldType
from org.apache.lucene.index import FieldInfo, IndexWriter, IndexWriterConfig
from org.apache.lucene.store import SimpleFSDirectory
from org.apache.lucene.util import Version
"""
This class is loosely based on the Lucene (java implementation) demo class
org.apache.lucene.demo.IndexFiles. It will take a directory as an argument
and will index all of the files in that directory and downward recursively.
It will index on the file path, the file name and the file contents. The
resulting Lucene index will be placed in the current directory and called
'index'.
"""
class Ticker(object):
def __init__(self):
self.tick = True
def run(self):
while self.tick:
sys.stdout.write('.')
sys.stdout.flush()
time.sleep(1.0)
class IndexFiles(object):
"""Usage: python IndexFiles """
def __init__(self, root, storeDir, analyzer):
if not os.path.exists(storeDir):
os.mkdir(storeDir)
store = SimpleFSDirectory(File(storeDir))
analyzer = LimitTokenCountAnalyzer(analyzer, 1048576)
config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
writer = IndexWriter(store, config)
self.indexDocs(root, writer)
ticker = Ticker()
print 'commit index',
threading.Thread(target=ticker.run).start()
writer.commit()
writer.close()
ticker.tick = False
print 'done'
def indexDocs(self, root, writer):
t1 = FieldType()
t1.setIndexed(True)
t1.setStored(True)
t1.setTokenized(False)
t1.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS)
t2 = FieldType()
t2.setIndexed(True)
t2.setStored(False)
t2.setTokenized(True)
t2.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
for root, dirnames, filenames in os.walk(root):
for filename in filenames:
if not filename.endswith('.txt'):
continue
print "adding", filename
try:
path = os.path.join(root, filename)
file = open(path)
contents = unicode(file.read(), 'iso-8859-1')
file.close()
doc = Document()
doc.add(Field("name", filename, t1))
doc.add(Field("path", root, t1))
if len(contents) > 0:
doc.add(Field("contents", contents, t2))
else:
print "warning: no content in %s" % filename
writer.addDocument(doc)
except Exception, e:
print "Failed in indexDocs:", e
if __name__ == '__main__':
if len(sys.argv) < 2:
print IndexFiles.__doc__
sys.exit(1)
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
print 'lucene', lucene.VERSION
start = datetime.now()
try:
base_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
IndexFiles(sys.argv[1], os.path.join(base_dir, INDEX_DIR),
StandardAnalyzer(Version.LUCENE_CURRENT))
end = datetime.now()
print end - start
except Exception, e:
print "Failed: ", e
raise e
pylucene-4.10.1-1/samples/manindex.py 000644 000765 000000 00000007770 12203673435 017643 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
#
# Author: Erik Hatcher
#
# to index all man pages on $MANPATH or /usr/share/man:
# python manindex.py pages
# ====================================================================
import os, re, sys, lucene
from subprocess import *
from java.io import File
from org.apache.lucene.analysis.miscellaneous import LimitTokenCountAnalyzer
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.index import IndexWriter, IndexWriterConfig
from org.apache.lucene.document import Document, Field, StringField, TextField
from org.apache.lucene.store import SimpleFSDirectory
from org.apache.lucene.util import Version
def indexDirectory(dir):
for name in os.listdir(dir):
path = os.path.join(dir, name)
if os.path.isfile(path):
indexFile(dir, name)
def indexFile(dir, filename):
path = os.path.join(dir, filename)
print " File: ", filename
if filename.endswith('.gz'):
child = Popen('gunzip -c ' + path + ' | groff -t -e -E -mandoc -Tascii | col -bx', shell=True, stdout=PIPE, cwd=os.path.dirname(dir)).stdout
command, section = re.search('^(.*)\.(.*)\.gz$', filename).groups()
else:
child = Popen('groff -t -e -E -mandoc -Tascii ' + path + ' | col -bx',
shell=True, stdout=PIPE, cwd=os.path.dirname(dir)).stdout
command, section = re.search('^(.*)\.(.*)$', filename).groups()
data = child.read()
err = child.close()
if err:
raise RuntimeError, '%s failed with exit code %d' %(command, err)
matches = re.search('^NAME$(.*?)^\S', data,
re.MULTILINE | re.DOTALL)
name = matches and matches.group(1) or ''
matches = re.search('^(?:SYNOPSIS|SYNOPSYS)$(.*?)^\S', data,
re.MULTILINE | re.DOTALL)
synopsis = matches and matches.group(1) or ''
matches = re.search('^(?:DESCRIPTION|OVERVIEW)$(.*?)', data,
re.MULTILINE | re.DOTALL)
description = matches and matches.group(1) or ''
doc = Document()
doc.add(Field("command", command, StringField.TYPE_STORED))
doc.add(Field("section", section, StringField.TYPE_STORED))
doc.add(Field("name", name.strip(), TextField.TYPE_STORED))
doc.add(Field("synopsis", synopsis.strip(), TextField.TYPE_STORED))
doc.add(Field("keywords", ' '.join((command, name, synopsis, description)),
TextField.TYPE_NOT_STORED))
doc.add(Field("filename", os.path.abspath(path), StringField.TYPE_STORED))
writer.addDocument(doc)
if __name__ == '__main__':
if len(sys.argv) != 2:
print "Usage: python manindex.py "
else:
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
directory = SimpleFSDirectory(File(sys.argv[1]))
analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
analyzer = LimitTokenCountAnalyzer(analyzer, 10000)
config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
writer = IndexWriter(directory, config)
manpath = os.environ.get('MANPATH', '/usr/share/man').split(os.pathsep)
for dir in manpath:
print "Crawling", dir
for name in os.listdir(dir):
path = os.path.join(dir, name)
if os.path.isdir(path):
indexDirectory(path)
writer.commit()
writer.close()
pylucene-4.10.1-1/samples/mansearch.py 000644 000765 000000 00000005550 12203673435 017773 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
#
# Author: Erik Hatcher
#
# to query the index generated with manindex.py
# python mansearch.py
# by default, the index is stored in 'pages', which can be overriden with
# the MANDEX environment variable
# ====================================================================
import sys, os, lucene
from string import Template
from datetime import datetime
from getopt import getopt, GetoptError
from java.io import File
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.index import DirectoryReader
from org.apache.lucene.queryparser.classic import QueryParser
from org.apache.lucene.search import IndexSearcher
from org.apache.lucene.store import SimpleFSDirectory
from org.apache.lucene.util import Version
if __name__ == '__main__':
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
def usage():
print sys.argv[0], "[--format=] [--index=] [--stats] "
print "default index is found from MANDEX environment variable"
try:
options, args = getopt(sys.argv[1:], '', ['format=', 'index=', 'stats'])
except GetoptError:
usage()
sys.exit(2)
format = "#name"
indexDir = os.environ.get('MANDEX') or 'pages'
stats = False
for o, a in options:
if o == "--format":
format = a
elif o == "--index":
indexDir = a
elif o == "--stats":
stats = True
class CustomTemplate(Template):
delimiter = '#'
template = CustomTemplate(format)
fsDir = SimpleFSDirectory(File(indexDir))
searcher = IndexSearcher(DirectoryReader.open(fsDir))
analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
parser = QueryParser(Version.LUCENE_CURRENT, "keywords", analyzer)
parser.setDefaultOperator(QueryParser.Operator.AND)
query = parser.parse(' '.join(args))
start = datetime.now()
scoreDocs = searcher.search(query, 50).scoreDocs
duration = datetime.now() - start
if stats:
print >>sys.stderr, "Found %d document(s) (in %s) that matched query '%s':" %(len(scoreDocs), duration, query)
for scoreDoc in scoreDocs:
doc = searcher.doc(scoreDoc.doc)
table = dict((field.name(), field.stringValue())
for field in doc.getFields())
print template.substitute(table)
pylucene-4.10.1-1/samples/PorterStemmerAnalyzer.py 000644 000765 000000 00000005414 12203673435 022347 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
# This sample illustrates how to write an Analyzer 'extension' in Python.
#
# What is happening behind the scenes ?
#
# The PorterStemmerAnalyzer python class does not in fact extend Analyzer,
# it merely provides an implementation for Analyzer's abstract tokenStream()
# method. When an instance of PorterStemmerAnalyzer is passed to PyLucene,
# with a call to IndexWriter(store, PorterStemmerAnalyzer(), True) for
# example, the PyLucene SWIG-based glue code wraps it into an instance of
# PythonAnalyzer, a proper java extension of Analyzer which implements a
# native tokenStream() method whose job is to call the tokenStream() method
# on the python instance it wraps. The PythonAnalyzer instance is the
# Analyzer extension bridge to PorterStemmerAnalyzer.
import sys, os, lucene
from datetime import datetime
from IndexFiles import IndexFiles
from org.apache.lucene.analysis.core import \
LowerCaseFilter, StopFilter, StopAnalyzer
from org.apache.lucene.analysis.en import PorterStemFilter
from org.apache.lucene.analysis.standard import \
StandardTokenizer, StandardFilter
from org.apache.lucene.util import Version
from org.apache.pylucene.analysis import PythonAnalyzer
class PorterStemmerAnalyzer(PythonAnalyzer):
def createComponents(self, fieldName, reader):
source = StandardTokenizer(Version.LUCENE_CURRENT, reader)
filter = StandardFilter(Version.LUCENE_CURRENT, source)
filter = LowerCaseFilter(Version.LUCENE_CURRENT, filter)
filter = PorterStemFilter(filter)
filter = StopFilter(Version.LUCENE_CURRENT, filter,
StopAnalyzer.ENGLISH_STOP_WORDS_SET)
return self.TokenStreamComponents(source, filter)
if __name__ == '__main__':
if len(sys.argv) < 2:
print IndexFiles.__doc__
sys.exit(1)
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
print 'lucene', lucene.VERSION
start = datetime.now()
try:
IndexFiles(sys.argv[1], "index", PorterStemmerAnalyzer())
end = datetime.now()
print end - start
except Exception, e:
print "Failed: ", e
pylucene-4.10.1-1/samples/SearchFiles.py 000644 000765 000000 00000003665 12203673435 020227 0 ustar 00vajda wheel 000000 000000 #!/usr/bin/env python
INDEX_DIR = "IndexFiles.index"
import sys, os, lucene
from java.io import File
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.index import DirectoryReader
from org.apache.lucene.queryparser.classic import QueryParser
from org.apache.lucene.store import SimpleFSDirectory
from org.apache.lucene.search import IndexSearcher
from org.apache.lucene.util import Version
"""
This script is loosely based on the Lucene (java implementation) demo class
org.apache.lucene.demo.SearchFiles. It will prompt for a search query, then it
will search the Lucene index in the current directory called 'index' for the
search query entered against the 'contents' field. It will then display the
'path' and 'name' fields for each of the hits it finds in the index. Note that
search.close() is currently commented out because it causes a stack overflow in
some cases.
"""
def run(searcher, analyzer):
while True:
print
print "Hit enter with no input to quit."
command = raw_input("Query:")
if command == '':
return
print
print "Searching for:", command
query = QueryParser(Version.LUCENE_CURRENT, "contents",
analyzer).parse(command)
scoreDocs = searcher.search(query, 50).scoreDocs
print "%s total matching documents." % len(scoreDocs)
for scoreDoc in scoreDocs:
doc = searcher.doc(scoreDoc.doc)
print 'path:', doc.get("path"), 'name:', doc.get("name")
if __name__ == '__main__':
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
print 'lucene', lucene.VERSION
base_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
directory = SimpleFSDirectory(File(os.path.join(base_dir, INDEX_DIR)))
searcher = IndexSearcher(DirectoryReader.open(directory))
analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
run(searcher, analyzer)
del searcher
pylucene-4.10.1-1/samples/TermPositionVector.py 000644 000765 000000 00000003321 12203673435 021643 0 ustar 00vajda wheel 000000 000000
import lucene
from org.apache.lucene.analysis.miscellaneous import LimitTokenCountAnalyzer
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.store import RAMDirectory
from org.apache.lucene.document import Document, Field, FieldType
from org.apache.lucene.util import BytesRef, BytesRefIterator, Version
from org.apache.lucene.index import \
IndexWriterConfig, IndexWriter, DirectoryReader
if __name__ == '__main__':
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
directory = RAMDirectory()
iconfig = IndexWriterConfig(Version.LUCENE_CURRENT, LimitTokenCountAnalyzer(StandardAnalyzer(Version.LUCENE_CURRENT), 100))
iwriter = IndexWriter(directory, iconfig)
ft = FieldType()
ft.setIndexed(True)
ft.setStored(True)
ft.setTokenized(True)
ft.setStoreTermVectors(True)
ft.setStoreTermVectorOffsets(True)
ft.setStoreTermVectorPositions(True)
ts = ["this bernhard is the text to be index text",
"this claudia is the text to be indexed"]
for t in ts:
doc = Document()
doc.add(Field("fieldname", t, ft))
iwriter.addDocument(doc)
iwriter.commit()
iwriter.close()
ireader = DirectoryReader.open(directory)
for doc in xrange(0, len(ts)):
tv = ireader.getTermVector(doc, "fieldname")
termsEnum = tv.iterator(None)
for term in BytesRefIterator.cast_(termsEnum):
dpEnum = termsEnum.docsAndPositions(None, None)
dpEnum.nextDoc() # prime the enum which works only for the current doc
freq = dpEnum.freq()
print 'term:', term.utf8ToString()
print ' freq:', freq
for i in xrange(freq):
print " pos:", dpEnum.nextPosition()
print " off: %i-%i" %(dpEnum.startOffset(), dpEnum.endOffset())
print
pylucene-4.10.1-1/samples/ThreadIndexFiles.py 000644 000765 000000 00000003057 12203673435 021214 0 ustar 00vajda wheel 000000 000000 # ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
# This sample illustrates how to use a thread with PyLucene
INDEX_DIR = "ThreadIndexFiles.index"
import sys, os, threading, lucene
from datetime import datetime
from IndexFiles import IndexFiles
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.util import Version
if __name__ == '__main__':
if len(sys.argv) < 2:
print IndexFiles.__doc__
sys.exit(1)
env=lucene.initVM(vmargs=['-Djava.awt.headless=true'])
print 'lucene', lucene.VERSION
def fn():
base_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
env.attachCurrentThread()
start = datetime.now()
IndexFiles(sys.argv[1], os.path.join(base_dir, INDEX_DIR),
StandardAnalyzer(Version.LUCENE_CURRENT))
end = datetime.now()
print end - start
threading.Thread(target=fn).start()
pylucene-4.10.1-1/python/collections.py 000644 000765 000000 00000024055 12050060702 020211 0 ustar 00vajda wheel 000000 000000 # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from lucene import JArray
from java.lang import IllegalStateException, IndexOutOfBoundsException
from java.util import NoSuchElementException
from org.apache.pylucene.util import \
PythonSet, PythonList, PythonIterator, PythonListIterator
class JavaSet(PythonSet):
"""
This class implements java.util.Set around a Python set instance it wraps.
"""
def __init__(self, _set):
super(JavaSet, self).__init__()
self._set = _set
def __contains__(self, obj):
return obj in self._set
def __len__(self):
return len(self._set)
def __iter__(self):
return iter(self._set)
def add(self, obj):
if obj not in self._set:
self._set.add(obj)
return True
return False
def addAll(self, collection):
size = len(self._set)
self._set.update(collection)
return len(self._set) > size
def clear(self):
self._set.clear()
def contains(self, obj):
return obj in self._set
def containsAll(self, collection):
for obj in collection:
if obj not in self._set:
return False
return True
def equals(self, collection):
if type(self) is type(collection):
return self._set == collection._set
return False
def isEmpty(self):
return len(self._set) == 0
def iterator(self):
class _iterator(PythonIterator):
def __init__(_self):
super(_iterator, _self).__init__()
_self._iterator = iter(self._set)
def hasNext(_self):
if hasattr(_self, '_next'):
return True
try:
_self._next = _self._iterator.next()
return True
except StopIteration:
return False
def next(_self):
if hasattr(_self, '_next'):
next = _self._next
del _self._next
else:
next = _self._iterator.next()
return next
return _iterator()
def remove(self, obj):
try:
self._set.remove(obj)
return True
except KeyError:
return False
def removeAll(self, collection):
result = False
for obj in collection:
try:
self._set.remove(obj)
result = True
except KeyError:
pass
return result
def retainAll(self, collection):
result = False
for obj in list(self._set):
if obj not in collection:
self._set.remove(obj)
result = True
return result
def size(self):
return len(self._set)
def toArray(self): # JavaSet
return list(self._set)
class JavaListIterator(PythonListIterator):
"""
This class implements java.util.ListIterator for a Python list instance it
wraps. (simple bidirectional iterator)
"""
def __init__(self, _lst, index=0):
super(JavaListIterator, self).__init__()
self._lst = _lst
self._lastIndex = -1 # keep state for remove/set
self.index = index
def next(self):
if self.index >= len(self._lst):
raise JavaError, NoSuchElementException(str(self.index))
result = self._lst[self.index]
self._lastIndex = self.index
self.index += 1
return result
def previous(self):
if self.index <= 0:
raise JavaError, NoSuchElementException(str(self.index - 1))
self.index -= 1
self._lastIndex = self.index
return self._lst[self.index]
def hasPrevious(self):
return self.index > 0
def hasNext(self):
return self.index < len(self._lst)
def nextIndex(self):
return min(self.index, len(self._lst))
def previousIndex(self):
return max(-1, self.index - 1)
def add(self, element):
"""
Inserts the specified element into the list.
The element is inserted immediately before the next element
that would be returned by next, if any, and after the next
element that would be returned by previous, if any.
"""
if self._lastIndex < 0:
raise JavaError, IllegalStateException("add")
self._lst.insert(self.index, element)
self.index += 1
self._lastIndex = -1 # invalidate state
def remove(self):
"""
Removes from the list the last element that
was returned by next or previous.
"""
if self._lastIndex < 0:
raise JavaError, IllegalStateException("remove")
del self._lst[self._lastIndex]
self._lastIndex = -1 # invalidate state
def set(self, element):
"""
Replaces the last element returned by next or previous
with the specified element.
"""
if self._lastIndex < 0:
raise JavaError, IllegalStateException("set")
self._lst[self._lastIndex] = element
def __iter__(self):
return self
class JavaList(PythonList):
"""
This class implements java.util.List around a Python list instance it wraps.
"""
def __init__(self, _lst):
super(JavaList, self).__init__()
self._lst = _lst
def __contains__(self, obj):
return obj in self._lst
def __len__(self):
return len(self._lst)
def __iter__(self):
return iter(self._lst)
def add(self, index, obj):
self._lst.insert(index, obj)
def addAll(self, collection):
size = len(self._lst)
self._lst.extend(collection)
return len(self._lst) > size
def addAll(self, index, collection):
size = len(self._lst)
self._lst[index:index] = collection
return len(self._lst) > size
def clear(self):
del self._lst[:]
def contains(self, obj):
return obj in self._lst
def containsAll(self, collection):
for obj in collection:
if obj not in self._lst:
return False
return True
def equals(self, collection):
if type(self) is type(collection):
return self._lst == collection._lst
return False
def get(self, index):
if index < 0 or index >= self.size():
raise JavaError, IndexOutOfBoundsException(str(index))
return self._lst[index]
def indexOf(self, obj):
try:
return self._lst.index(obj)
except ValueError:
return -1
def isEmpty(self):
return len(self._lst) == 0
def iterator(self):
class _iterator(PythonIterator):
def __init__(_self):
super(_iterator, _self).__init__()
_self._iterator = iter(self._lst)
def hasNext(_self):
if hasattr(_self, '_next'):
return True
try:
_self._next = _self._iterator.next()
return True
except StopIteration:
return False
def next(_self):
if hasattr(_self, '_next'):
next = _self._next
del _self._next
else:
next = _self._iterator.next()
return next
return _iterator()
def lastIndexOf(self, obj):
i = len(self._lst)-1
while (i>=0):
if obj.equals(self._lst[i]):
break
i -= 1
return i
def listIterator(self, index=0):
return JavaListIterator(self._lst, index)
def remove(self, obj_or_index):
if type(obj_or_index) is type(1):
return removeAt(int(obj_or_index))
return removeElement(obj_or_index)
def removeAt(self, pos):
"""
Removes the element at the specified position in this list.
Note: private method called from Java via remove(int index)
index is already checked (or IndexOutOfBoundsException thrown)
"""
try:
el = self._lst[pos]
del self._lst[pos]
return el
except IndexError:
# should not happen
return None
def removeObject(self, obj):
"""
Removes the first occurrence of the specified object
from this list, if it is present
"""
try:
self._lst.remove(obj)
return True
except ValueError:
return False
def removeAll(self, collection):
result = False
for obj in collection:
if self.removeElement(obj):
result = True
return result
def retainAll(self, collection):
result = False
for obj in self._lst:
if obj not in collection and self.removeElement(obj):
result = True
return result
def size(self):
return len(self._lst)
def toArray(self):
return self._lst
def subListChecked(self, fromIndex, toIndex):
"""
Note: private method called from Java via subList()
from/to index are already checked (or IndexOutOfBoundsException thrown)
also IllegalArgumentException is thronw if the endpoint indices
are out of order (fromIndex > toIndex)
"""
sublst = self._lst[fromIndex:toIndex]
return JavaList(sublst)
def set(self, index, obj):
if index < 0 or index >= self.size():
raise JavaError, IndexOutOfBoundsException(str(index))
self._lst[index] = obj
pylucene-4.10.1-1/python/ICUFoldingFilter.py 000644 000765 000000 00000005067 11364157465 021012 0 ustar 00vajda wheel 000000 000000 # -*- coding: utf-8 -*-
# ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
#
# Port of java/org/apache/lucene/analysis/icu/ICUFoldingFilter.java
# using IBM's C++ ICU wrapped by PyICU (http://pyicu.osafoundation.org)
#
# A TokenFilter that applies search term folding to Unicode text,
# applying foldings from UTR#30 Character Foldings.
#
# This filter applies the following foldings from the report to unicode text:
#
# Accent removal
# Case folding
# Canonical duplicates folding
# Dashes folding
# Diacritic removal (including stroke, hook, descender)
# Greek letterforms folding
# Han Radical folding
# Hebrew Alternates folding
# Jamo folding
# Letterforms folding
# Math symbol folding
# Multigraph Expansions: All
# Native digit folding
# No-break folding
# Overline folding
# Positional forms folding
# Small forms folding
# Space folding
# Spacing Accents folding
# Subscript folding
# Superscript folding
# Suzhou Numeral folding
# Symbol folding
# Underline folding
# Vertical forms folding
# Width folding
#
# Additionally, Default Ignorables are removed, and text is normalized to NFKC.
# All foldings, case folding, and normalization mappings are applied
# recursively to ensure a fully folded and normalized result.
#
# ====================================================================
import os, lucene
from lucene.ICUNormalizer2Filter import ICUNormalizer2Filter
from icu import ResourceBundle, Normalizer2, UNormalizationMode2
utr30 = os.path.join(lucene.__dir__, 'resources',
'org', 'apache', 'lucene', 'analysis', 'icu',
'utr30.dat')
ResourceBundle.setAppData("utr30", utr30)
class ICUFoldingFilter(ICUNormalizer2Filter):
def __init__(self, input):
normalizer = Normalizer2.getInstance("utr30", "utr30",
UNormalizationMode2.COMPOSE)
super(ICUFoldingFilter, self).__init__(input, normalizer)
pylucene-4.10.1-1/python/ICUNormalizer2Filter.py 000644 000765 000000 00000005624 12016246051 021614 0 ustar 00vajda wheel 000000 000000 # -*- coding: utf-8 -*-
# ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
#
# Port of java/org/apache/lucene/analysis/icu/ICUNormalizer2Filter.java
# using IBM's C++ ICU wrapped by PyICU (http://pyicu.osafoundation.org)
#
# Normalize token text with ICU's {@link com.ibm.icu.text.Normalizer2}
#
# With this filter, you can normalize text in the following ways:
# - NFKC Normalization, Case Folding, and removing Ignorables (the default)
# - Using a standard Normalization mode (NFC, NFD, NFKC, NFKD)
# - Based on rules from a custom normalization mapping.
#
# If you use the defaults, this filter is a simple way to standardize
# Unicode text in a language-independent way for search:
# - The case folding that it does can be seen as a replacement for
# LowerCaseFilter: For example, it handles cases such as the Greek
# sigma, so that "Μάϊος" and "ΜΆΪΟΣ" will match correctly.
# - The normalization will standardizes different forms of the same
# character in Unicode. For example, CJK full-width numbers will be
# standardized to their ASCII forms.
# - Ignorables such as Zero-Width Joiner and Variation Selectors are
# removed. These are typically modifier characters that affect display.
#
# ====================================================================
from icu import Normalizer2, UNormalizationMode2, UNormalizationCheckResult
from org.apache.lucene.analysis.tokenattributes import CharTermAttribute
from org.apache.pylucene.analysis import PythonTokenFilter
class ICUNormalizer2Filter(PythonTokenFilter):
def __init__(self, input, normalizer=None):
super(ICUNormalizer2Filter, self).__init__(input)
self.input = input
self.termAtt = self.addAttribute(CharTermAttribute.class_);
if normalizer is None:
normalizer = Normalizer2.getInstance(None, "nfkc_cf", UNormalizationMode2.COMPOSE)
self.normalizer = normalizer
def incrementToken(self):
if self.input.incrementToken():
text = self.termAtt.toString()
if self.normalizer.quickCheck(text) != UNormalizationCheckResult.YES:
self.termAtt.setEmpty()
self.termAtt.append(self.normalizer.normalize(text))
return True
return False
pylucene-4.10.1-1/python/ICUTransformFilter.py 000644 000765 000000 00000006424 12016246051 021362 0 ustar 00vajda wheel 000000 000000 # -*- coding: utf-8 -*-
# ====================================================================
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ====================================================================
#
# Port of java/org/apache/lucene/analysis/icu/ICUTransformFilter.java
# using IBM's C++ ICU wrapped by PyICU (http://pyicu.osafoundation.org)
#
# A TokenFilter that transforms text with ICU.
#
# ICU provides text-transformation functionality via its Transliteration API.
# Although script conversion is its most common use, a Transliterator can
# actually perform a more general class of tasks. In fact, Transliterator
# defines a very general API which specifies only that a segment of the input
# text is replaced by new text. The particulars of this conversion are
# determined entirely by subclasses of Transliterator.
#
# Some useful transformations for search are built-in:
# - Conversion from Traditional to Simplified Chinese characters
# - Conversion from Hiragana to Katakana
# - Conversion from Fullwidth to Halfwidth forms.
# - Script conversions, for example Serbian Cyrillic to Latin
#
# Example usage: stream = new ICUTransformFilter(stream,
# Transliterator.getInstance("Traditional-Simplified"));
#
# For more details, see the ICU User Guide at:
# http://userguide.icu-project.org/transforms/general
#
# ====================================================================
from org.apache.pylucene.analysis import PythonTokenFilter
from org.apache.lucene.analysis.tokenattributes import CharTermAttribute
from icu import UTransPosition
class ICUTransformFilter(PythonTokenFilter):
# Create a new ICUTransformFilter that transforms text on the given
# stream.
#
# @param input {@link TokenStream} to filter.
# @param transform Transliterator to transform the text.
def __init__(self, input, transform):
super(ICUTransformFilter, self).__init__(input)
# Reusable position object
self.position = UTransPosition()
# term attribute, will be updated with transformed text.
self.termAtt = self.addAttribute(CharTermAttribute.class_)
self.input = input
self.transform = transform
def incrementToken(self):
if self.input.incrementToken():
text = self.termAtt.toString()
length = len(text)
self.position.start = 0
self.position.limit = length
self.position.contextStart = 0
self.position.contextLimit = length
text = self.transform.filteredTransliterate(text, self.position,
False)
self.termAtt.setEmpty()
self.termAtt.append(text)
return True
return False
pylucene-4.10.1-1/java/org/ 000755 000765 000000 00000000000 12413103672 015511 5 ustar 00vajda wheel 000000 000000 pylucene-4.10.1-1/java/org/apache/ 000755 000765 000000 00000000000 12413103672 016732 5 ustar 00vajda wheel 000000 000000 pylucene-4.10.1-1/java/org/apache/pylucene/ 000755 000765 000000 00000000000 12413103672 020556 5 ustar 00vajda wheel 000000 000000 pylucene-4.10.1-1/java/org/apache/pylucene/analysis/ 000755 000765 000000 00000000000 12413103672 022401 5 ustar 00vajda wheel 000000 000000 pylucene-4.10.1-1/java/org/apache/pylucene/index/ 000755 000765 000000 00000000000 12413103672 021665 5 ustar 00vajda wheel 000000 000000 pylucene-4.10.1-1/java/org/apache/pylucene/queryparser/ 000755 000765 000000 00000000000 12413103672 023140 5 ustar 00vajda wheel 000000 000000 pylucene-4.10.1-1/java/org/apache/pylucene/search/ 000755 000765 000000 00000000000 12413103672 022023 5 ustar 00vajda wheel 000000 000000 pylucene-4.10.1-1/java/org/apache/pylucene/store/ 000755 000765 000000 00000000000 12413103672 021712 5 ustar 00vajda wheel 000000 000000 pylucene-4.10.1-1/java/org/apache/pylucene/util/ 000755 000765 000000 00000000000 12413103672 021533 5 ustar 00vajda wheel 000000 000000 pylucene-4.10.1-1/java/org/apache/pylucene/util/PythonAttribute.java 000644 000765 000000 00000001526 11776645223 025565 0 ustar 00vajda wheel 000000 000000 /* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.pylucene.util;
import org.apache.lucene.util.Attribute;
public interface PythonAttribute extends Attribute {
}
pylucene-4.10.1-1/java/org/apache/pylucene/util/PythonAttributeImpl.java 000644 000765 000000 00000002614 11776645223 026406 0 ustar 00vajda wheel 000000 000000 /* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.pylucene.util;
import org.apache.lucene.util.AttributeImpl;
public class PythonAttributeImpl extends AttributeImpl {
private long pythonObject;
public PythonAttributeImpl()
{
}
public void pythonExtension(long pythonObject)
{
this.pythonObject = pythonObject;
}
public long pythonExtension()
{
return this.pythonObject;
}
public void finalize()
throws Throwable
{
pythonDecRef();
}
public native void pythonDecRef();
public native void clear();
public native void copyTo(AttributeImpl target);
public native boolean equals(Object obj);
public native int hashCode();
}
pylucene-4.10.1-1/java/org/apache/pylucene/util/PythonComparable.java 000644 000765 000000 00000002337 11140022613 025641 0 ustar 00vajda wheel 000000 000000 /* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.pylucene.util;
public class PythonComparable implements Comparable {
private long pythonObject;
public PythonComparable()
{
}
public void pythonExtension(long pythonObject)
{
this.pythonObject = pythonObject;
}
public long pythonExtension()
{
return this.pythonObject;
}
public void finalize()
throws Throwable
{
pythonDecRef();
}
public native void pythonDecRef();
public native int compareTo(Object o);
}
pylucene-4.10.1-1/java/org/apache/pylucene/util/PythonIterator.java 000644 000765 000000 00000002556 11140022613 025370 0 ustar 00vajda wheel 000000 000000 /* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.pylucene.util;
import java.util.Iterator;
public class PythonIterator implements Iterator {
private long pythonObject;
public PythonIterator()
{
}
public void pythonExtension(long pythonObject)
{
this.pythonObject = pythonObject;
}
public long pythonExtension()
{
return this.pythonObject;
}
public void finalize()
throws Throwable
{
pythonDecRef();
}
public native void pythonDecRef();
public native boolean hasNext();
public native Object next();
public void remove()
{
throw new UnsupportedOperationException();
}
}
pylucene-4.10.1-1/java/org/apache/pylucene/util/PythonList.java 000644 000765 000000 00000006624 11776052737 024543 0 ustar 00vajda wheel 000000 000000 /* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.pylucene.util;
import java.util.List;
import java.util.ListIterator;
import java.util.Collection;
import java.util.Iterator;
import java.lang.reflect.Array;
public class PythonList implements List {
private long pythonObject;
public PythonList()
{
}
public void pythonExtension(long pythonObject)
{
this.pythonObject = pythonObject;
}
public long pythonExtension()
{
return this.pythonObject;
}
public void finalize()
throws Throwable
{
pythonDecRef();
}
public native void pythonDecRef();
public native boolean add(Object obj);
public native void add(int index, Object obj);
public native boolean addAll(Collection c);
public native boolean addAll(int index, Collection c);
public native void clear();
public native boolean contains(Object obj);
public native boolean containsAll(Collection c);
public native boolean equals(Object obj);
public native Object get(int index);
// public native int hashCode();
public native int indexOf(Object obj);
public native boolean isEmpty();
public native Iterator iterator();
public native int lastIndexOf(Object obj);
public native ListIterator listIterator(int index);
public ListIterator listIterator()
{
return listIterator(0);
}
private native Object removeAt(int index);
public Object remove(int index)
throws IndexOutOfBoundsException
{
if (index < 0 || index >= this.size())
throw new IndexOutOfBoundsException();
return removeAt(index);
}
private native boolean removeObject(Object obj);
public boolean remove(Object obj)
{
return removeObject(obj);
}
public native boolean removeAll(Collection c);
public native boolean retainAll(Collection c);
public native Object set(int index, Object obj);
public native int size();
private native List subListChecked(int fromIndex, int toIndex);
public List subList(int fromIndex, int toIndex)
throws IndexOutOfBoundsException, IllegalArgumentException
{
if (fromIndex < 0 || toIndex >= size() || fromIndex > toIndex)
throw new IndexOutOfBoundsException();
return subListChecked(fromIndex, toIndex);
}
public native Object[] toArray();
public Object[] toArray(Object[] a)
{
Object[] array = toArray();
if (a.length < array.length)
a = (Object[]) Array.newInstance(a.getClass().getComponentType(),
array.length);
System.arraycopy(array, 0, a, 0, array.length);
return a;
}
}
pylucene-4.10.1-1/java/org/apache/pylucene/util/PythonListIterator.java 000644 000765 000000 00000002177 11776052737 026254 0 ustar 00vajda wheel 000000 000000 /* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.pylucene.util;
import java.util.ListIterator;
public class PythonListIterator extends PythonIterator implements ListIterator {
public native boolean hasPrevious();
public native Object previous();
public native int nextIndex();
public native int previousIndex();
public native void set(Object obj);
public native void add(Object obj);
public native void remove();
}
pylucene-4.10.1-1/java/org/apache/pylucene/util/PythonSet.java 000644 000765 000000 00000004233 11776052737 024355 0 ustar 00vajda wheel 000000 000000 /* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.pylucene.util;
import java.util.Set;
import java.util.Collection;
import java.util.Iterator;
import java.lang.reflect.Array;
public class PythonSet implements Set {
private long pythonObject;
public PythonSet()
{
}
public void pythonExtension(long pythonObject)
{
this.pythonObject = pythonObject;
}
public long pythonExtension()
{
return this.pythonObject;
}
public void finalize()
throws Throwable
{
pythonDecRef();
}
public native void pythonDecRef();
public native boolean add(Object obj);
public native boolean addAll(Collection c);
public native void clear();
public native boolean contains(Object obj);
public native boolean containsAll(Collection c);
public native boolean equals(Object obj);
public native boolean isEmpty();
public native Iterator iterator();
public native boolean remove(Object obj);
public native boolean removeAll(Collection c);
public native boolean retainAll(Collection c);
public native int size();
public native Object[] toArray();
public Object[] toArray(Object[] a)
{
Object[] array = toArray();
if (a.length < array.length)
a = (Object[]) Array.newInstance(a.getClass().getComponentType(),
array.length);
System.arraycopy(array, 0, a, 0, array.length);
return a;
}
}
pylucene-4.10.1-1/java/org/apache/pylucene/store/PythonDirectory.java 000644 000765 000000 00000005203 12223651517 025730 0 ustar 00vajda wheel 000000 000000 /* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.pylucene.store;
import java.io.IOException;
import java.util.Collection;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.LockFactory;
import org.apache.lucene.store.Lock;
public class PythonDirectory extends Directory {
private long pythonObject;
public PythonDirectory()
{
}
public void pythonExtension(long pythonObject)
{
this.pythonObject = pythonObject;
}
public long pythonExtension()
{
return this.pythonObject;
}
public void finalize()
throws Throwable
{
pythonDecRef();
}
public void sync(Collection names)
throws IOException
{
for (String name : names)
sync(name);
}
public native void pythonDecRef();
public native void close()
throws IOException;
public native IndexOutput createOutput(String name, IOContext context)
throws IOException;
public native void deleteFile(String name)
throws IOException;
public native boolean fileExists(String name)
throws IOException;
public native long fileLength(String name)
throws IOException;
public native long fileModified(String name)
throws IOException;
public native String[] listAll()
throws IOException;
public native IndexInput openInput(String name, IOContext context)
throws IOException;
public native void touchFile(String name)
throws IOException;
public native void sync(String name)
throws IOException;
public native LockFactory getLockFactory();
public native void setLockFactory(LockFactory lockFactory)
throws IOException;
public native void clearLock(String name)
throws IOException;
public native Lock makeLock(String name);
}
pylucene-4.10.1-1/java/org/apache/pylucene/store/PythonIndexInput.java 000644 000765 000000 00000004144 12063232331 026045 0 ustar 00vajda wheel 000000 000000 /* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.pylucene.store;
import java.io.IOException;
import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.store.IOContext;
public class PythonIndexInput extends BufferedIndexInput {
private long pythonObject;
public PythonIndexInput(String resourceDesc)
{
super(resourceDesc);
}
public PythonIndexInput(String resourceDesc, int bufferSize)
{
super(resourceDesc, bufferSize);
}
public PythonIndexInput(String resourceDesc, IOContext context)
{
super(resourceDesc, context);
}
public void pythonExtension(long pythonObject)
{
this.pythonObject = pythonObject;
}
public long pythonExtension()
{
return this.pythonObject;
}
public void finalize()
throws Throwable
{
pythonDecRef();
}
public native void pythonDecRef();
public native PythonIndexInput clone();
public native long length();
public native void close()
throws IOException;
public native byte[] readInternal(int length, long pos)
throws IOException;
public native void seekInternal(long pos)
throws IOException;
protected void readInternal(byte[] b, int offset, int length)
throws IOException
{
byte[] data = readInternal(length, getFilePointer());
System.arraycopy(data, 0, b, offset, data.length);
}
}
pylucene-4.10.1-1/java/org/apache/pylucene/store/PythonIndexOutput.java 000644 000765 000000 00000003723 12356527510 026263 0 ustar 00vajda wheel 000000 000000 /* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.pylucene.store;
import java.io.IOException;
import org.apache.lucene.store.IndexOutput;
public class PythonIndexOutput extends IndexOutput {
private long pythonObject;
public PythonIndexOutput()
{
}
public void pythonExtension(long pythonObject)
{
this.pythonObject = pythonObject;
}
public long pythonExtension()
{
return this.pythonObject;
}
public void finalize()
throws Throwable
{
pythonDecRef();
}
public native void pythonDecRef();
public void flush()
throws IOException
{}
public native long getFilePointer();
public native long getChecksum()
throws IOException;
public native void close()
throws IOException;
public native void writeByte(byte b)
throws IOException;
public native void writeBytes(byte[] bytes)
throws IOException;
public void writeBytes(byte[] bytes, int offset, int length)
throws IOException
{
if (offset > 0 || length < bytes.length)
{
byte[] data = new byte[length];
System.arraycopy(bytes, offset, data, 0, length);
writeBytes(data);
}
else
writeBytes(bytes);
}
}
pylucene-4.10.1-1/java/org/apache/pylucene/store/PythonLock.java 000644 000765 000000 00000002522 12320050337 024644 0 ustar 00vajda wheel 000000 000000 /* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.pylucene.store;
import org.apache.lucene.store.Lock;
public class PythonLock extends Lock {
private long pythonObject;
public PythonLock()
{
}
public void pythonExtension(long pythonObject)
{
this.pythonObject = pythonObject;
}
public long pythonExtension()
{
return this.pythonObject;
}
public void finalize()
throws Throwable
{
pythonDecRef();
}
public native void pythonDecRef();
public native boolean isLocked();
public native boolean obtain();
public native void release();
public native void close();
}
pylucene-4.10.1-1/java/org/apache/pylucene/store/PythonLockFactory.java 000644 000765 000000 00000002645 11265527030 026210 0 ustar 00vajda wheel 000000 000000 /* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.pylucene.store;
import java.io.IOException;
import org.apache.lucene.store.Lock;
import org.apache.lucene.store.LockFactory;
public class PythonLockFactory extends LockFactory {
private long pythonObject;
public PythonLockFactory()
{
}
public void pythonExtension(long pythonObject)
{
this.pythonObject = pythonObject;
}
public long pythonExtension()
{
return this.pythonObject;
}
public void finalize()
throws Throwable
{
pythonDecRef();
}
public native void pythonDecRef();
public native Lock makeLock(String lockName);
public native void clearLock(String lockName)
throws IOException;
}
pylucene-4.10.1-1/java/org/apache/pylucene/search/highlight/ 000755 000765 000000 00000000000 12413103672 023772 5 ustar 00vajda wheel 000000 000000 pylucene-4.10.1-1/java/org/apache/pylucene/search/PythonByteParser.java 000644 000765 000000 00000003040 12106374252 026150 0 ustar 00vajda wheel 000000 000000 /* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.pylucene.search;
import java.io.IOException;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.util.BytesRef;
/**
* @author Andi Vajda
*/
public class PythonByteParser implements FieldCache.ByteParser {
private long pythonObject;
public PythonByteParser()
{
}
public void pythonExtension(long pythonObject)
{
this.pythonObject = pythonObject;
}
public long pythonExtension()
{
return this.pythonObject;
}
public void finalize()
throws Throwable
{
pythonDecRef();
}
public native void pythonDecRef();
public native byte parseByte(BytesRef ref);
public native TermsEnum termsEnum(Terms terms)
throws IOException;
}
pylucene-4.10.1-1/java/org/apache/pylucene/search/PythonCollector.java 000644 000765 000000 00000003463 11776052737 026044 0 ustar 00vajda wheel 000000 000000 /* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.pylucene.search;
import java.io.IOException;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.index.AtomicReaderContext;
public class PythonCollector extends Collector {
private long pythonObject;
public PythonCollector()
{
}
public void pythonExtension(long pythonObject)
{
this.pythonObject = pythonObject;
}
public long pythonExtension()
{
return this.pythonObject;
}
public void finalize()
throws Throwable
{
pythonDecRef();
}
protected Scorer scorer;
public void setScorer(Scorer scorer)
throws IOException
{
this.scorer = scorer;
}
public void collect(int doc)
throws IOException
{
collect(doc, scorer.score());
}
public native void pythonDecRef();
public native void collect(int doc, float score)
throws IOException;
public native void setNextReader(AtomicReaderContext context)
throws IOException;
public native boolean acceptsDocsOutOfOrder();
}
pylucene-4.10.1-1/java/org/apache/pylucene/search/PythonDoubleParser.java 000644 000765 000000 00000003052 12106374252 026462 0 ustar 00vajda wheel 000000 000000 /* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.pylucene.search;
import java.io.IOException;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.util.BytesRef;
/**
* @author Andi Vajda
*/
public class PythonDoubleParser implements FieldCache.DoubleParser {
private long pythonObject;
public PythonDoubleParser()
{
}
public void pythonExtension(long pythonObject)
{
this.pythonObject = pythonObject;
}
public long pythonExtension()
{
return this.pythonObject;
}
public void finalize()
throws Throwable
{
pythonDecRef();
}
public native void pythonDecRef();
public native double parseDouble(BytesRef ref);
public native TermsEnum termsEnum(Terms terms)
throws IOException;
}
pylucene-4.10.1-1/java/org/apache/pylucene/search/PythonFieldComparator.java 000644 000765 000000 00000003701 12270547743 027160 0 ustar 00vajda wheel 000000 000000 /* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.pylucene.search;
import java.io.IOException;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.index.AtomicReaderContext;
/**
* @author Andi Vajda
*/
public class PythonFieldComparator extends FieldComparator {
private long pythonObject;
public PythonFieldComparator()
{
}
public void pythonExtension(long pythonObject)
{
this.pythonObject = pythonObject;
}
public long pythonExtension()
{
return this.pythonObject;
}
public void finalize()
throws Throwable
{
pythonDecRef();
}
public native void pythonDecRef();
public native int compare(int slot1, int slot2);
public native int compareBottom(int doc)
throws IOException;
public native int compareTop(int doc)
throws IOException;
public native void setBottom(final int slot);
public native void setTopValue(T value);
public native void copy(int slot, int doc)
throws IOException;
public native FieldComparator setNextReader(AtomicReaderContext context)
throws IOException;
public native T value(int slot);
public native int compareDocToValue(int doc, T value)
throws IOException;
}
pylucene-4.10.1-1/java/org/apache/pylucene/search/PythonFieldComparatorSource.java 000644 000765 000000 00000003136 11260501155 030324 0 ustar 00vajda wheel 000000 000000 /* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.pylucene.search;
import java.io.IOException;
import org.apache.lucene.search.FieldComparatorSource;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.index.IndexReader;
/**
* @author Andi Vajda
*/
public class PythonFieldComparatorSource extends FieldComparatorSource {
private long pythonObject;
public PythonFieldComparatorSource()
{
}
public void pythonExtension(long pythonObject)
{
this.pythonObject = pythonObject;
}
public long pythonExtension()
{
return this.pythonObject;
}
public void finalize()
throws Throwable
{
pythonDecRef();
}
public native void pythonDecRef();
public native FieldComparator newComparator(String fieldname, int numHits,
int sortPos, boolean reversed)
throws IOException;
}
pylucene-4.10.1-1/java/org/apache/pylucene/search/PythonFilter.java 000644 000765 000000 00000002740 11776054615 025335 0 ustar 00vajda wheel 000000 000000 /* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.pylucene.search;
import java.io.IOException;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.util.Bits;
import org.apache.lucene.index.AtomicReaderContext;
public class PythonFilter extends Filter {
private long pythonObject;
public PythonFilter()
{
}
public void pythonExtension(long pythonObject)
{
this.pythonObject = pythonObject;
}
public long pythonExtension()
{
return this.pythonObject;
}
public void finalize()
throws Throwable
{
pythonDecRef();
}
public native void pythonDecRef();
public native DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs)
throws IOException;
}
pylucene-4.10.1-1/java/org/apache/pylucene/search/PythonFloatParser.java 000644 000765 000000 00000003045 12106374252 026317 0 ustar 00vajda wheel 000000 000000 /* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.pylucene.search;
import java.io.IOException;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.util.BytesRef;
/**
* @author Andi Vajda
*/
public class PythonFloatParser implements FieldCache.FloatParser {
private long pythonObject;
public PythonFloatParser()
{
}
public void pythonExtension(long pythonObject)
{
this.pythonObject = pythonObject;
}
public long pythonExtension()
{
return this.pythonObject;
}
public void finalize()
throws Throwable
{
pythonDecRef();
}
public native void pythonDecRef();
public native float parseFloat(BytesRef ref);
public native TermsEnum termsEnum(Terms terms)
throws IOException;
}
pylucene-4.10.1-1/java/org/apache/pylucene/search/PythonIntParser.java 000644 000765 000000 00000003033 12106374252 026001 0 ustar 00vajda wheel 000000 000000 /* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.pylucene.search;
import java.io.IOException;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.util.BytesRef;
/**
* @author Andi Vajda
*/
public class PythonIntParser implements FieldCache.IntParser {
private long pythonObject;
public PythonIntParser()
{
}
public void pythonExtension(long pythonObject)
{
this.pythonObject = pythonObject;
}
public long pythonExtension()
{
return this.pythonObject;
}
public void finalize()
throws Throwable
{
pythonDecRef();
}
public native void pythonDecRef();
public native int parseInt(BytesRef ref);
public native TermsEnum termsEnum(Terms terms)
throws IOException;
}
pylucene-4.10.1-1/java/org/apache/pylucene/search/PythonLongParser.java 000644 000765 000000 00000003040 12106374252 026144 0 ustar 00vajda wheel 000000 000000 /* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.pylucene.search;
import java.io.IOException;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.util.BytesRef;
/**
* @author Andi Vajda
*/
public class PythonLongParser implements FieldCache.LongParser {
private long pythonObject;
public PythonLongParser()
{
}
public void pythonExtension(long pythonObject)
{
this.pythonObject = pythonObject;
}
public long pythonExtension()
{
return this.pythonObject;
}
public void finalize()
throws Throwable
{
pythonDecRef();
}
public native void pythonDecRef();
public native long parseLong(BytesRef ref);
public native TermsEnum termsEnum(Terms terms)
throws IOException;
}
pylucene-4.10.1-1/java/org/apache/pylucene/search/PythonShortParser.java 000644 000765 000000 00000003045 12106374252 026351 0 ustar 00vajda wheel 000000 000000 /* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.pylucene.search;
import java.io.IOException;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.util.BytesRef;
/**
* @author Andi Vajda
*/
public class PythonShortParser implements FieldCache.ShortParser {
private long pythonObject;
public PythonShortParser()
{
}
public void pythonExtension(long pythonObject)
{
this.pythonObject = pythonObject;
}
public long pythonExtension()
{
return this.pythonObject;
}
public void finalize()
throws Throwable
{
pythonDecRef();
}
public native void pythonDecRef();
public native short parseShort(BytesRef ref);
public native TermsEnum termsEnum(Terms terms)
throws IOException;
}
pylucene-4.10.1-1/java/org/apache/pylucene/search/similarities/ 000755 000765 000000 00000000000 12413103672 024521 5 ustar 00vajda wheel 000000 000000 pylucene-4.10.1-1/java/org/apache/pylucene/search/spans/ 000755 000765 000000 00000000000 12413103672 023147 5 ustar 00vajda wheel 000000 000000 pylucene-4.10.1-1/java/org/apache/pylucene/search/spans/PythonSpans.java 000644 000765 000000 00000003252 12134322213 026274 0 ustar 00vajda wheel 000000 000000 /* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.pylucene.search.spans;
import java.io.IOException;
import java.util.Collection;
import org.apache.lucene.search.spans.Spans;
public class PythonSpans extends Spans {
private long pythonObject;
public PythonSpans()
{
}
public void pythonExtension(long pythonObject)
{
this.pythonObject = pythonObject;
}
public long pythonExtension()
{
return this.pythonObject;
}
public void finalize()
throws Throwable
{
pythonDecRef();
}
public native void pythonDecRef();
public native boolean next()
throws IOException;
public native boolean skipTo(int target)
throws IOException;
public native int doc();
public native int start();
public native int end();
public native Collection getPayload()
throws IOException;
public native boolean isPayloadAvailable()
throws IOException;
public native long cost();
}
pylucene-4.10.1-1/java/org/apache/pylucene/search/similarities/PythonDefaultSimilarity.java 000644 000765 000000 00000003666 12070177153 032240 0 ustar 00vajda wheel 000000 000000 /* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.pylucene.search.similarities;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.index.FieldInvertState;
public class PythonDefaultSimilarity extends DefaultSimilarity {
private long pythonObject;
public PythonDefaultSimilarity()
{
}
public void pythonExtension(long pythonObject)
{
this.pythonObject = pythonObject;
}
public long pythonExtension()
{
return this.pythonObject;
}
public void finalize()
throws Throwable
{
pythonDecRef();
}
public native void pythonDecRef();
public native float queryNorm(float sumOfSquaredWeights);
public native float coord(int overlap, int maxOverlap);
public native float lengthNorm(FieldInvertState state);
public native float tf(float freq);
public native float sloppyFreq(int distance);
public native float idf(long docFreq, long numDocs);
public native Explanation idfExplain(CollectionStatistics collectionStats,
TermStatistics[] stats);
}
pylucene-4.10.1-1/java/org/apache/pylucene/search/highlight/PythonFormatter.java 000644 000765 000000 00000002644 11140022613 027777 0 ustar 00vajda wheel 000000 000000 /* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.pylucene.search.highlight;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.TokenGroup;
public class PythonFormatter implements Formatter {
private long pythonObject;
public PythonFormatter()
{
}
public void pythonExtension(long pythonObject)
{
this.pythonObject = pythonObject;
}
public long pythonExtension()
{
return this.pythonObject;
}
public void finalize()
throws Throwable
{
pythonDecRef();
}
public native void pythonDecRef();
public native String highlightTerm(String originalText,
TokenGroup tokenGroup);
}
pylucene-4.10.1-1/java/org/apache/pylucene/search/highlight/PythonFragmenter.java 000644 000765 000000 00000002634 11260501155 030133 0 ustar 00vajda wheel 000000 000000 /* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.pylucene.search.highlight;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.analysis.TokenStream;
public class PythonFragmenter implements Fragmenter {
private long pythonObject;
public PythonFragmenter()
{
}
public void pythonExtension(long pythonObject)
{
this.pythonObject = pythonObject;
}
public long pythonExtension()
{
return this.pythonObject;
}
public void finalize()
throws Throwable
{
pythonDecRef();
}
public native void pythonDecRef();
public native boolean isNewFragment();
public native void start(String originalText, TokenStream tokenStream);
}
pylucene-4.10.1-1/java/org/apache/pylucene/queryparser/classic/ 000755 000765 000000 00000000000 12413103672 024561 5 ustar 00vajda wheel 000000 000000 pylucene-4.10.1-1/java/org/apache/pylucene/queryparser/classic/PythonMultiFieldQueryParser.java 000644 000765 000000 00000006330 12262032117 033066 0 ustar 00vajda wheel 000000 000000 /* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.pylucene.queryparser.classic;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.Query;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.util.Version;
public class PythonMultiFieldQueryParser extends MultiFieldQueryParser {
private long pythonObject;
public PythonMultiFieldQueryParser(Version version, String[] fields,
Analyzer analyzer)
{
super(version, fields, analyzer);
}
public void pythonExtension(long pythonObject)
{
this.pythonObject = pythonObject;
}
public long pythonExtension()
{
return this.pythonObject;
}
public void finalize()
throws Throwable
{
pythonDecRef();
}
public native void pythonDecRef();
public native Query getBooleanQuery(List clauses, boolean disableCoord);
public native Query getFuzzyQuery(String field, String termText,
float minSimilarity);
public native Query getPrefixQuery(String field, String termText);
public native Query getRangeQuery(String field,
String part1, String part2,
boolean startInclusive,
boolean endInclusive);
public native Query getWildcardQuery(String field, String termText);
public native Query getFieldQuery_quoted(String field, String queryText,
boolean quoted);
public native Query getFieldQuery_slop(String field, String queryText,
int slop);
public Query getFieldQuery_quoted_super(String field, String queryText,
boolean quoted)
throws ParseException
{
return super.getFieldQuery(field, queryText, quoted);
}
public Query getFieldQuery_slop_super(String field, String queryText,
int slop)
throws ParseException
{
return super.getFieldQuery(field, queryText, slop);
}
public Query getFieldQuery(String field, String queryText, boolean quoted)
{
return getFieldQuery_quoted(field, queryText, quoted);
}
public Query getFieldQuery(String field, String queryText, int slop)
{
return getFieldQuery_slop(field, queryText, slop);
}
}
pylucene-4.10.1-1/java/org/apache/pylucene/queryparser/classic/PythonQueryParser.java 000644 000765 000000 00000006421 12262030174 031111 0 ustar 00vajda wheel 000000 000000 /* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.pylucene.queryparser.classic;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.Query;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.queryparser.classic.CharStream;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.util.Version;
public class PythonQueryParser extends QueryParser {
private long pythonObject;
public PythonQueryParser(Version version, String field, Analyzer analyzer)
{
super(version, field, analyzer);
}
public PythonQueryParser(CharStream stream)
{
super(stream);
}
public void pythonExtension(long pythonObject)
{
this.pythonObject = pythonObject;
}
public long pythonExtension()
{
return this.pythonObject;
}
public void finalize()
throws Throwable
{
pythonDecRef();
}
public native void pythonDecRef();
public native Query getBooleanQuery(List clauses, boolean disableCoord);
public native Query getFuzzyQuery(String field, String termText,
float minSimilarity);
public native Query getPrefixQuery(String field, String termText);
public native Query getRangeQuery(String field,
String part1, String part2,
boolean startInclusive,
boolean endInclusive);
public native Query getWildcardQuery(String field, String termText);
public native Query getFieldQuery_quoted(String field, String queryText,
boolean quoted);
public native Query getFieldQuery_slop(String field, String queryText,
int slop);
public Query getFieldQuery_quoted_super(String field, String queryText,
boolean quoted)
throws ParseException
{
return super.getFieldQuery(field, queryText, quoted);
}
public Query getFieldQuery_slop_super(String field, String queryText,
int slop)
throws ParseException
{
return super.getFieldQuery(field, queryText, slop);
}
public Query getFieldQuery(String field, String queryText, boolean quoted)
{
return getFieldQuery_quoted(field, queryText, quoted);
}
public Query getFieldQuery(String field, String queryText, int slop)
{
return getFieldQuery_slop(field, queryText, slop);
}
}
pylucene-4.10.1-1/java/org/apache/pylucene/index/PythonIndexDeletionPolicy.java 000644 000765 000000 00000003042 12134322213 027636 0 ustar 00vajda wheel 000000 000000 /* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.pylucene.index;
import java.io.IOException;
import java.util.List;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexDeletionPolicy;
public class PythonIndexDeletionPolicy extends IndexDeletionPolicy {
private long pythonObject;
public PythonIndexDeletionPolicy()
{
}
public void pythonExtension(long pythonObject)
{
this.pythonObject = pythonObject;
}
public long pythonExtension()
{
return this.pythonObject;
}
public void finalize()
throws Throwable
{
pythonDecRef();
}
public native void pythonDecRef();
public native void onInit(List extends IndexCommit> commits)
throws IOException;
public native void onCommit(List extends IndexCommit> commits)
throws IOException;
}
pylucene-4.10.1-1/java/org/apache/pylucene/analysis/PythonAnalyzer.java 000644 000765 000000 00000002531 11776052737 026254 0 ustar 00vajda wheel 000000 000000 /* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.pylucene.analysis;
import org.apache.lucene.analysis.Analyzer;
import java.io.Reader;
public class PythonAnalyzer extends Analyzer {
private long pythonObject;
public PythonAnalyzer()
{
}
public void pythonExtension(long pythonObject)
{
this.pythonObject = pythonObject;
}
public long pythonExtension()
{
return this.pythonObject;
}
public void finalize()
throws Throwable
{
pythonDecRef();
}
public native void pythonDecRef();
public native TokenStreamComponents createComponents(final String fieldName, final Reader reader);
}
pylucene-4.10.1-1/java/org/apache/pylucene/analysis/PythonCharTokenizer.java 000644 000765 000000 00000002744 11562320723 027227 0 ustar 00vajda wheel 000000 000000 /* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.pylucene.analysis;
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.util.Version;
public class PythonCharTokenizer extends CharTokenizer {
private long pythonObject;
public PythonCharTokenizer(Version version, Reader reader)
{
super(version, reader);
}
public void pythonExtension(long pythonObject)
{
this.pythonObject = pythonObject;
}
public long pythonExtension()
{
return this.pythonObject;
}
public void finalize()
throws Throwable
{
pythonDecRef();
}
public native void pythonDecRef();
public native boolean isTokenChar(int c);
public native int normalize(int c);
}
pylucene-4.10.1-1/java/org/apache/pylucene/analysis/PythonFilteringTokenFilter.java 000644 000765 000000 00000003017 12146240342 030540 0 ustar 00vajda wheel 000000 000000 /* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.pylucene.analysis;
import org.apache.lucene.analysis.util.FilteringTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.util.Version;
import java.io.IOException;
public class PythonFilteringTokenFilter extends FilteringTokenFilter {
private long pythonObject;
public PythonFilteringTokenFilter(Version version, TokenStream tokenStream)
{
super(version, tokenStream);
}
public void pythonExtension(long pythonObject)
{
this.pythonObject = pythonObject;
}
public long pythonExtension()
{
return this.pythonObject;
}
public void finalize()
throws Throwable
{
pythonDecRef();
}
public native void pythonDecRef();
public native boolean accept()
throws IOException;
}
pylucene-4.10.1-1/java/org/apache/pylucene/analysis/PythonTokenFilter.java 000644 000765 000000 00000002726 11265027142 026704 0 ustar 00vajda wheel 000000 000000 /* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.pylucene.analysis;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Token;
import java.io.IOException;
public class PythonTokenFilter extends TokenFilter {
private long pythonObject;
public PythonTokenFilter(TokenStream tokenStream)
{
super(tokenStream);
}
public void pythonExtension(long pythonObject)
{
this.pythonObject = pythonObject;
}
public long pythonExtension()
{
return this.pythonObject;
}
public void finalize()
throws Throwable
{
pythonDecRef();
}
public native void pythonDecRef();
public native boolean incrementToken()
throws IOException;
}
pylucene-4.10.1-1/java/org/apache/pylucene/analysis/PythonTokenizer.java 000644 000765 000000 00000002650 11265027142 026424 0 ustar 00vajda wheel 000000 000000 /* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.pylucene.analysis;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.Token;
import java.io.IOException;
import java.io.Reader;
public class PythonTokenizer extends Tokenizer {
private long pythonObject;
public PythonTokenizer(Reader reader)
{
super(reader);
}
public void pythonExtension(long pythonObject)
{
this.pythonObject = pythonObject;
}
public long pythonExtension()
{
return this.pythonObject;
}
public void finalize()
throws Throwable
{
pythonDecRef();
}
public native void pythonDecRef();
public native boolean incrementToken()
throws IOException;
}
pylucene-4.10.1-1/java/org/apache/pylucene/analysis/PythonTokenStream.java 000644 000765 000000 00000003044 11265027142 026704 0 ustar 00vajda wheel 000000 000000 /* ====================================================================
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.pylucene.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Token;
import java.io.IOException;
public class PythonTokenStream extends TokenStream {
private long pythonObject;
public PythonTokenStream()
{
}
public void pythonExtension(long pythonObject)
{
this.pythonObject = pythonObject;
}
public long pythonExtension()
{
return this.pythonObject;
}
public void finalize()
throws Throwable
{
pythonDecRef();
}
public native void pythonDecRef();
public native boolean incrementToken()
throws IOException;
public native void end()
throws IOException;
public native void reset()
throws IOException;
public native void close()
throws IOException;
}